trie.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (
  14. node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined
  16. ) => {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. if (node[3] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[2].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. };
  33. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => {
  34. return [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  35. };
  36. export const hostnameToTokens = (hostname: string): string[] => {
  37. const tokens = hostname.split('.');
  38. const results: string[] = [];
  39. let token = '';
  40. for (let i = 0, l = tokens.length; i < l; i++) {
  41. if (i > 0) {
  42. results.push('.');
  43. }
  44. token = tokens[i];
  45. if (token.length > 0) {
  46. results.push(token);
  47. }
  48. }
  49. return results;
  50. };
  51. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  52. const tokens = hostname.split('.');
  53. let token = '';
  54. const l = tokens.length - 1;
  55. for (let i = l; i >= 0; i--) {
  56. if (
  57. i < l // when i === l, we are at the first of hostname, no splitor there
  58. // when onToken returns true, we should skip the rest of the loop
  59. && onToken('.')
  60. ) {
  61. return true;
  62. }
  63. token = tokens[i];
  64. if (
  65. token.length > 0
  66. // when onToken returns true, we should skip the rest of the loop
  67. && onToken(token)
  68. ) {
  69. return true;
  70. }
  71. }
  72. return false;
  73. };
  74. export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
  75. let size = 0;
  76. const root: TrieNode<Meta> = createNode();
  77. /**
  78. * Method used to add the given suffix to the trie.
  79. */
  80. const add = smolTree
  81. ? (suffix: string, meta?: Meta): void => {
  82. let node: TrieNode<Meta> = root;
  83. const onToken = (token: string) => {
  84. if (node[2].has(token)) {
  85. node = node[2].get(token)!;
  86. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  87. // Dedupe the covered subdomain by skipping
  88. if (token === '.' && node[0]) {
  89. return true;
  90. }
  91. } else {
  92. const newNode = createNode(node);
  93. node[2].set(token, newNode);
  94. node = newNode;
  95. }
  96. return false;
  97. };
  98. // When walkHostnameTokens returns true, we should skip the rest
  99. if (walkHostnameTokens(suffix, onToken)) {
  100. return;
  101. }
  102. // If we are in smolTree mode, we need to do something at the end of the loop
  103. if (suffix[0] === '.') {
  104. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  105. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  106. (/** parent */ node[1]!)[0] = false;
  107. // Removing the rest of the parent's child nodes
  108. node[2].clear();
  109. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  110. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  111. } else if (node[2].get('.')?.[0] === true) {
  112. // Trying to add `example.com` when there is already a `.example.com` in the trie
  113. // No need to increment size and set SENTINEL to true (skip this "new" item)
  114. return;
  115. }
  116. node[0] = true;
  117. node[3] = meta!;
  118. }
  119. : (suffix: string, meta?: Meta): void => {
  120. let node: TrieNode<Meta> = root;
  121. const onToken = (token: string) => {
  122. if (node[2].has(token)) {
  123. node = node[2].get(token)!;
  124. } else {
  125. const newNode = createNode(node);
  126. node[2].set(token, newNode);
  127. node = newNode;
  128. }
  129. return false;
  130. };
  131. // When walkHostnameTokens returns true, we should skip the rest
  132. if (walkHostnameTokens(suffix, onToken)) {
  133. return;
  134. }
  135. if (!node[0]) {
  136. size++;
  137. node[0] = true;
  138. node[3] = meta!;
  139. }
  140. };
  141. const walkIntoLeafWithTokens = (
  142. tokens: string[],
  143. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  144. ) => {
  145. let node: TrieNode = root;
  146. let parent: TrieNode = node;
  147. let token: string;
  148. for (let i = tokens.length - 1; i >= 0; i--) {
  149. token = tokens[i];
  150. if (token === '') {
  151. break;
  152. }
  153. parent = node;
  154. if (node[2].has(token)) {
  155. node = node[2].get(token)!;
  156. } else {
  157. return null;
  158. }
  159. onLoop(node, parent, token);
  160. }
  161. return { node, parent };
  162. };
  163. const walkIntoLeafWithSuffix = (
  164. suffix: string,
  165. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  166. ) => {
  167. let node: TrieNode = root;
  168. let parent: TrieNode = node;
  169. const onToken = (token: string) => {
  170. if (token === '') {
  171. return true;
  172. }
  173. parent = node;
  174. if (node[2].has(token)) {
  175. node = node[2].get(token)!;
  176. } else {
  177. return null;
  178. }
  179. onLoop(node, parent, token);
  180. return false;
  181. };
  182. if (walkHostnameTokens(suffix, onToken) === null) {
  183. return null;
  184. }
  185. return { node, parent };
  186. };
  187. const contains = (suffix: string): boolean => {
  188. return walkIntoLeafWithSuffix(suffix) !== null;
  189. };
  190. const walk = (
  191. onMatches: (suffix: string[], meta: Meta) => void,
  192. initialNode = root,
  193. initialSuffix: string[] = []
  194. ) => {
  195. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  196. // Resolving initial string (begin the start of the stack)
  197. const suffixStack: string[][] = [initialSuffix];
  198. let node: TrieNode<Meta> = root;
  199. do {
  200. node = nodeStack.pop()!;
  201. const suffix = suffixStack.pop()!;
  202. node[2].forEach((childNode, k) => {
  203. // Pushing the child node to the stack for next iteration of DFS
  204. nodeStack.push(childNode);
  205. suffixStack.push([k, ...suffix]);
  206. });
  207. // If the node is a sentinel, we push the suffix to the results
  208. if (node[0]) {
  209. onMatches(suffix, node[3]);
  210. }
  211. } while (nodeStack.length);
  212. };
  213. interface FindSingleChildLeafResult {
  214. node: TrieNode,
  215. toPrune: TrieNode | null,
  216. tokenToPrune: string | null,
  217. parent: TrieNode
  218. }
  219. const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
  220. let toPrune: TrieNode | null = null;
  221. let tokenToPrune: string | null = null;
  222. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  223. // Keeping track of a potential branch to prune
  224. // Even if the node size is 1, but the single child is ".", we should retain the branch
  225. // Since the "." could be special if it is the leaf-est node
  226. const onlyChild = node[2].size < 2 && !node[2].has('.');
  227. if (toPrune != null) { // the top-est branch that could potentially being pruned
  228. if (!onlyChild) {
  229. // The branch has moew than single child, retain the branch.
  230. // And we need to abort prune the parent, so we set it to null
  231. toPrune = null;
  232. tokenToPrune = null;
  233. }
  234. } else if (onlyChild) {
  235. // There is only one token child, or no child at all, we can prune it safely
  236. // It is now the top-est branch that could potentially being pruned
  237. toPrune = parent;
  238. tokenToPrune = token;
  239. }
  240. };
  241. const res = walkIntoLeafWithTokens(tokens, onLoop);
  242. if (res === null) return null;
  243. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  244. };
  245. /**
  246. * Method used to retrieve every item in the trie with the given prefix.
  247. */
  248. const find = (
  249. inputSuffix: string,
  250. /** @default true */ includeEqualWithSuffix = true
  251. ): string[] => {
  252. // if (smolTree) {
  253. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  254. // }
  255. const inputTokens = hostnameToTokens(inputSuffix);
  256. const res = walkIntoLeafWithTokens(inputTokens);
  257. if (res === null) return [];
  258. const matches: string[][] = [];
  259. const onMatches = includeEqualWithSuffix
  260. // fast path (default option)
  261. ? (suffix: string[]) => matches.push(suffix)
  262. // slow path
  263. : (suffix: string[]) => {
  264. if (!deepEqualArray(suffix, inputTokens)) {
  265. matches.push(suffix);
  266. }
  267. };
  268. walk(
  269. onMatches,
  270. res.node, // Performing DFS from prefix
  271. inputTokens
  272. );
  273. return matches.map((m) => fastStringArrayJoin(m, ''));
  274. };
  275. /**
  276. * Method used to delete a prefix from the trie.
  277. */
  278. const remove = (suffix: string): boolean => {
  279. const res = getSingleChildLeaf(hostnameToTokens(suffix));
  280. if (res === null) return false;
  281. if (!res.node[0]) return false;
  282. size--;
  283. const { node, toPrune, tokenToPrune } = res;
  284. if (tokenToPrune && toPrune) {
  285. toPrune[2].delete(tokenToPrune);
  286. } else {
  287. node[0] = false;
  288. }
  289. return true;
  290. };
  291. /**
  292. * Method used to assert whether the given prefix exists in the Trie.
  293. */
  294. const has = (suffix: string): boolean => {
  295. const res = walkIntoLeafWithSuffix(suffix);
  296. return res
  297. ? res.node[0]
  298. : false;
  299. };
  300. function dump(onSuffix: (suffix: string) => void): void;
  301. function dump(): string[];
  302. function dump(onSuffix?: (suffix: string) => void): string[] | void {
  303. const results: string[] = [];
  304. const handleSuffix = onSuffix
  305. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  306. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  307. walk(handleSuffix);
  308. return results;
  309. };
  310. const dumpMeta = () => {
  311. const results: Meta[] = [];
  312. walk((suffix, meta) => {
  313. results.push(meta);
  314. });
  315. return results;
  316. };
  317. const dumpWithMeta = () => {
  318. const results: Array<[string, Meta]> = [];
  319. walk((suffix, meta) => {
  320. results.push([fastStringArrayJoin(suffix, ''), meta]);
  321. });
  322. return results;
  323. };
  324. const whitelist = (suffix: string) => {
  325. if (!smolTree) {
  326. throw new Error('whitelist method is only available in smolTree mode.');
  327. }
  328. const tokens = hostnameToTokens(suffix);
  329. const res = getSingleChildLeaf(tokens);
  330. if (res === null) return;
  331. const { node, toPrune, tokenToPrune, parent } = res;
  332. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  333. if (tokens[0] === '.') {
  334. // If there is a `[start]sub.example.com` here, remove it
  335. parent[0] = false;
  336. // Removing all the child nodes by disconnecting ".", which removes "blog.sub.example.com"
  337. parent[2].delete('.');
  338. }
  339. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  340. const dotNode = node[2].get('.');
  341. if (dotNode) {
  342. dotNode[0] = false;
  343. }
  344. // if (dotNode?.s === true) {
  345. // dotnode[0] = false;
  346. // }
  347. // return early if not found
  348. if (!node[0]) return;
  349. if (tokenToPrune && toPrune) {
  350. toPrune[2].delete(tokenToPrune);
  351. } else {
  352. node[0] = false;
  353. }
  354. };
  355. // Actually build trie
  356. if (Array.isArray(from)) {
  357. for (let i = 0, l = from.length; i < l; i++) {
  358. add(from[i]);
  359. }
  360. } else if (from) {
  361. from.forEach((value) => add(value));
  362. }
  363. const inspect = (depth: number, unpackMeta?: (meta?: Meta) => any) => fastStringArrayJoin(
  364. JSON.stringify(deepTrieNodeToJSON(root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  365. '\n'
  366. );
  367. return {
  368. add,
  369. contains,
  370. find,
  371. remove,
  372. delete: remove,
  373. has,
  374. dump,
  375. dumpMeta,
  376. dumpWithMeta,
  377. get size() {
  378. if (smolTree) {
  379. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  380. }
  381. return size;
  382. },
  383. get root() {
  384. return root;
  385. },
  386. whitelist,
  387. inspect,
  388. [util.inspect.custom]: inspect,
  389. smolTree
  390. };
  391. };
  392. export type Trie = ReturnType<typeof createTrie>;
  393. function deepEqualArray(a: string[], b: string[]) {
  394. let len = a.length;
  395. if (len !== b.length) return false;
  396. while (len--) {
  397. if (a[len] !== b[len]) return false;
  398. }
  399. return true;
  400. };