trie.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** end */
  9. boolean, /** includeAllSubdoain (.example.org, ||example.com) */
  10. TrieNode | null, /** parent */
  11. Map<string, TrieNode>, /** children */
  12. Meta /** meta */
  13. ];
  14. function deepTrieNodeToJSON(node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined) {
  16. const obj: Record<string, any> = {};
  17. if (node[0]) {
  18. obj['[start]'] = node[0];
  19. }
  20. obj['[subdomain]'] = node[1];
  21. if (node[4] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[3].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. }
  33. const createNode = <Meta = any>(allSubdomain = false, parent: TrieNode | null = null): TrieNode => [false, allSubdomain, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  34. export function hostnameToTokens(hostname: string): string[] {
  35. const tokens = hostname.split('.');
  36. const results: string[] = [];
  37. let token = '';
  38. for (let i = 0, l = tokens.length; i < l; i++) {
  39. token = tokens[i];
  40. if (token.length > 0) {
  41. results.push(token);
  42. }
  43. }
  44. return results;
  45. }
  46. function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
  47. const tokens = hostname.split('.');
  48. const l = tokens.length - 1;
  49. // we are at the first of hostname, no splitor there
  50. let token = '';
  51. for (let i = l; i >= 0; i--) {
  52. token = tokens[i];
  53. if (token.length > 0) {
  54. const t = onToken(token);
  55. if (t === null) {
  56. return null;
  57. }
  58. // if the callback returns true, we should skip the rest
  59. if (t) {
  60. return true;
  61. }
  62. }
  63. }
  64. return false;
  65. }
  66. interface FindSingleChildLeafResult<Meta> {
  67. node: TrieNode<Meta>,
  68. toPrune: TrieNode<Meta> | null,
  69. tokenToPrune: string | null,
  70. parent: TrieNode<Meta>
  71. }
  72. abstract class Triebase<Meta = any> {
  73. protected readonly $root: TrieNode<Meta> = createNode();
  74. protected $size = 0;
  75. get root() {
  76. return this.$root;
  77. }
  78. constructor(from?: string[] | Set<string> | null) {
  79. // Actually build trie
  80. if (Array.isArray(from)) {
  81. for (let i = 0, l = from.length; i < l; i++) {
  82. this.add(from[i]);
  83. }
  84. } else if (from) {
  85. from.forEach((value) => this.add(value));
  86. }
  87. }
  88. public abstract add(suffix: string, includeAllSubdoain?: boolean, meta?: Meta): void;
  89. protected walkIntoLeafWithTokens(
  90. tokens: string[],
  91. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  92. ) {
  93. let node: TrieNode = this.$root;
  94. let parent: TrieNode = node;
  95. let token: string;
  96. for (let i = tokens.length - 1; i >= 0; i--) {
  97. token = tokens[i];
  98. // if (token === '') {
  99. // break;
  100. // }
  101. parent = node;
  102. if (node[3].has(token)) {
  103. node = node[3].get(token)!;
  104. } else {
  105. return null;
  106. }
  107. onLoop(node, parent, token);
  108. }
  109. return { node, parent };
  110. };
  111. protected walkIntoLeafWithSuffix(
  112. suffix: string,
  113. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  114. ) {
  115. let node: TrieNode = this.$root;
  116. let parent: TrieNode = node;
  117. const onToken = (token: string) => {
  118. // if (token === '') {
  119. // return true;
  120. // }
  121. parent = node;
  122. if (node[3].has(token)) {
  123. node = node[3].get(token)!;
  124. } else {
  125. return null;
  126. }
  127. onLoop(node, parent, token);
  128. return false;
  129. };
  130. if (walkHostnameTokens(suffix, onToken) === null) {
  131. return null;
  132. }
  133. return { node, parent };
  134. };
  135. public contains(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
  136. if (suffix[0] === '.') {
  137. suffix = suffix.slice(1);
  138. }
  139. const res = this.walkIntoLeafWithSuffix(suffix);
  140. if (!res) return false;
  141. if (includeAllSubdoain) return res.node[1];
  142. return true;
  143. };
  144. private walk(
  145. onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
  146. initialNode = this.$root,
  147. initialSuffix: string[] = []
  148. ) {
  149. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  150. // Resolving initial string (begin the start of the stack)
  151. const suffixStack: string[][] = [initialSuffix];
  152. let node: TrieNode<Meta> = initialNode;
  153. do {
  154. node = nodeStack.pop()!;
  155. const suffix = suffixStack.pop()!;
  156. node[3].forEach((childNode, k) => {
  157. // Pushing the child node to the stack for next iteration of DFS
  158. nodeStack.push(childNode);
  159. suffixStack.push([k, ...suffix]);
  160. });
  161. // If the node is a sentinel, we push the suffix to the results
  162. if (node[0]) {
  163. onMatches(suffix, node[1], node[4]);
  164. }
  165. } while (nodeStack.length);
  166. };
  167. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  168. let toPrune: TrieNode | null = null;
  169. let tokenToPrune: string | null = null;
  170. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  171. // Keeping track of a potential branch to prune
  172. // Even if the node size is 1, but the single child is ".", we should retain the branch
  173. // Since the "." could be special if it is the leaf-est node
  174. const onlyChild = node[3].size === 0 && !node[2];
  175. if (toPrune != null) { // the top-est branch that could potentially being pruned
  176. if (!onlyChild) {
  177. // The branch has moew than single child, retain the branch.
  178. // And we need to abort prune the parent, so we set it to null
  179. toPrune = null;
  180. tokenToPrune = null;
  181. }
  182. } else if (onlyChild) {
  183. // There is only one token child, or no child at all, we can prune it safely
  184. // It is now the top-est branch that could potentially being pruned
  185. toPrune = parent;
  186. tokenToPrune = token;
  187. }
  188. };
  189. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  190. if (res === null) return null;
  191. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  192. };
  193. /**
  194. * Method used to retrieve every item in the trie with the given prefix.
  195. */
  196. public find(
  197. inputSuffix: string,
  198. subdomainOnly = inputSuffix[0] === '.'
  199. // /** @default true */ includeEqualWithSuffix = true
  200. ): string[] {
  201. if (inputSuffix[0] === '.') {
  202. inputSuffix = inputSuffix.slice(1);
  203. }
  204. const inputTokens = hostnameToTokens(inputSuffix);
  205. const res = this.walkIntoLeafWithTokens(inputTokens);
  206. if (res === null) return [];
  207. const results: string[] = [];
  208. const onMatches = subdomainOnly
  209. ? (suffix: string[], subdomain: boolean) => { // fast path (default option)
  210. const d = fastStringArrayJoin(suffix, '.');
  211. if (!subdomain && d === inputSuffix) return;
  212. results.push(subdomain ? '.' + d : d);
  213. }
  214. : (suffix: string[], subdomain: boolean) => { // fast path (default option)
  215. const d = fastStringArrayJoin(suffix, '.');
  216. results.push(subdomain ? '.' + d : d);
  217. };
  218. this.walk(
  219. onMatches,
  220. res.node, // Performing DFS from prefix
  221. inputTokens
  222. );
  223. return results;
  224. };
  225. /**
  226. * Method used to delete a prefix from the trie.
  227. */
  228. public remove(suffix: string): boolean {
  229. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  230. if (res === null) return false;
  231. if (!res.node[0]) return false;
  232. this.$size--;
  233. const { node, toPrune, tokenToPrune } = res;
  234. if (tokenToPrune && toPrune) {
  235. toPrune[3].delete(tokenToPrune);
  236. } else {
  237. node[0] = false;
  238. }
  239. return true;
  240. };
  241. // eslint-disable-next-line @typescript-eslint/unbound-method -- safe
  242. public delete = this.remove;
  243. /**
  244. * Method used to assert whether the given prefix exists in the Trie.
  245. */
  246. public has(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
  247. if (suffix[0] === '.') {
  248. suffix = suffix.slice(1);
  249. }
  250. const res = this.walkIntoLeafWithSuffix(suffix);
  251. if (res === null) return false;
  252. if (!res.node[0]) return false;
  253. if (includeAllSubdoain) return res.node[1];
  254. return true;
  255. };
  256. public dump(onSuffix: (suffix: string) => void): void;
  257. public dump(): string[];
  258. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  259. const results: string[] = [];
  260. const handleSuffix = onSuffix
  261. ? (suffix: string[], subdomain: boolean) => {
  262. const d = fastStringArrayJoin(suffix, '.');
  263. onSuffix(subdomain ? '.' + d : d);
  264. }
  265. : (suffix: string[], subdomain: boolean) => {
  266. const d = fastStringArrayJoin(suffix, '.');
  267. results.push(subdomain ? '.' + d : d);
  268. };
  269. this.walk(handleSuffix);
  270. return results;
  271. };
  272. public dumpMeta(onMeta: (meta: Meta) => void): void;
  273. public dumpMeta(): Meta[];
  274. public dumpMeta(onMeta?: (meta: Meta) => void): Meta[] | void {
  275. const results: Meta[] = [];
  276. const handleMeta = onMeta
  277. ? (_suffix: string[], _subdomain: boolean, meta: Meta) => onMeta(meta)
  278. : (_suffix: string[], _subdomain: boolean, meta: Meta) => results.push(meta);
  279. this.walk(handleMeta);
  280. return results;
  281. };
  282. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
  283. public dumpWithMeta(): Array<[string, Meta | undefined]>;
  284. public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): Array<[string, Meta | undefined]> | void {
  285. const results: Array<[string, Meta | undefined]> = [];
  286. const handleSuffix = onSuffix
  287. ? (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  288. const d = fastStringArrayJoin(suffix, '.');
  289. return onSuffix(subdomain ? '.' + d : d, meta);
  290. }
  291. : (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  292. const d = fastStringArrayJoin(suffix, '.');
  293. results.push([subdomain ? '.' + d : d, meta]);
  294. };
  295. this.walk(handleSuffix);
  296. return results;
  297. };
  298. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  299. return fastStringArrayJoin(
  300. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  301. '\n'
  302. );
  303. }
  304. public [util.inspect.custom](depth: number) {
  305. return this.inspect(depth);
  306. };
  307. }
  308. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  309. public smolTree = true;
  310. add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
  311. let node: TrieNode<Meta> = this.$root;
  312. let curNodeChildren: Map<string, TrieNode<Meta>> = node[3];
  313. if (suffix[0] === '.') {
  314. suffix = suffix.slice(1);
  315. }
  316. const onToken = (token: string) => {
  317. curNodeChildren = node[3];
  318. if (curNodeChildren.has(token)) {
  319. node = curNodeChildren.get(token)!;
  320. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  321. if (node[1]) {
  322. return true;
  323. }
  324. } else {
  325. const newNode = createNode(false, node);
  326. curNodeChildren.set(token, newNode);
  327. node = newNode;
  328. }
  329. return false;
  330. };
  331. // When walkHostnameTokens returns true, we should skip the rest
  332. if (walkHostnameTokens(suffix, onToken)) {
  333. return;
  334. }
  335. // If we are in smolTree mode, we need to do something at the end of the loop
  336. if (includeAllSubdoain) {
  337. // Trying to add `[.]sub.example.com` where there is already a `blog.sub.example.com` in the trie
  338. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  339. // (/** parent */ node[2]!)[0] = false;
  340. // Removing the rest of the parent's child nodes
  341. node[3].clear();
  342. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  343. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  344. } else if (node[1]) {
  345. // Trying to add `example.com` when there is already a `.example.com` in the trie
  346. // No need to increment size and set SENTINEL to true (skip this "new" item)
  347. return;
  348. }
  349. node[0] = true;
  350. node[1] = includeAllSubdoain;
  351. node[4] = meta!;
  352. }
  353. public whitelist(suffix: string, includeAllSubdoain = suffix[0] === '.') {
  354. if (suffix[0] === '.') {
  355. suffix = suffix.slice(1);
  356. }
  357. const tokens = hostnameToTokens(suffix);
  358. const res = this.getSingleChildLeaf(tokens);
  359. if (res === null) return;
  360. const { node, toPrune, tokenToPrune } = res;
  361. // Trying to whitelist `[start].sub.example.com` where there might already be a `[start]blog.sub.example.com` in the trie
  362. if (includeAllSubdoain) {
  363. // If there is a `[start]sub.example.com` here, remove it
  364. node[0] = false;
  365. node[1] = false;
  366. // Removing all the child nodes by empty the children
  367. node[3].clear();
  368. } else {
  369. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  370. node[1] = false;
  371. }
  372. // return early if not found
  373. if (!node[0]) return;
  374. if (tokenToPrune && toPrune) {
  375. toPrune[3].delete(tokenToPrune);
  376. } else {
  377. node[0] = false;
  378. }
  379. };
  380. }
  381. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  382. get size() {
  383. return this.$size;
  384. }
  385. add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
  386. let node: TrieNode<Meta> = this.$root;
  387. const onToken = (token: string) => {
  388. if (node[3].has(token)) {
  389. node = node[3].get(token)!;
  390. } else {
  391. const newNode = createNode(false, node);
  392. node[3].set(token, newNode);
  393. node = newNode;
  394. }
  395. return false;
  396. };
  397. if (suffix[0] === '.') {
  398. suffix = suffix.slice(1);
  399. }
  400. // When walkHostnameTokens returns true, we should skip the rest
  401. if (walkHostnameTokens(suffix, onToken)) {
  402. return;
  403. }
  404. // if same entry has been added before, skip
  405. if (node[0]) {
  406. return;
  407. }
  408. this.$size++;
  409. node[0] = true;
  410. node[1] = includeAllSubdoain;
  411. node[4] = meta!;
  412. }
  413. }
  414. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  415. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  416. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  417. if (smolTree) {
  418. return new HostnameSmolTrie(from);
  419. }
  420. return new HostnameTrie(from);
  421. };
  422. export type Trie = ReturnType<typeof createTrie>;
  423. // function deepEqualArray(a: string[], b: string[]) {
  424. // let len = a.length;
  425. // if (len !== b.length) return false;
  426. // while (len--) {
  427. // if (a[len] !== b[len]) return false;
  428. // }
  429. // return true;
  430. // };