trie.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. const noop = () => { /** noop */ };
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (
  14. node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined
  16. ) => {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. if (node[3] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[2].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. };
  33. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => {
  34. return [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  35. };
  36. export const hostnameToTokens = (hostname: string): string[] => {
  37. const tokens = hostname.split('.');
  38. const results: string[] = [];
  39. let token = '';
  40. for (let i = 0, l = tokens.length; i < l; i++) {
  41. if (i > 0) {
  42. results.push('.');
  43. }
  44. token = tokens[i];
  45. if (token.length > 0) {
  46. results.push(token);
  47. }
  48. }
  49. return results;
  50. };
  51. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  52. const tokens = hostname.split('.');
  53. let token = '';
  54. const l = tokens.length - 1;
  55. for (let i = l; i >= 0; i--) {
  56. if (
  57. i < l // when i === l, we are at the first of hostname, no splitor there
  58. // when onToken returns true, we should skip the rest of the loop
  59. && onToken('.')
  60. ) {
  61. return true;
  62. }
  63. token = tokens[i];
  64. if (
  65. token.length > 0
  66. // when onToken returns true, we should skip the rest of the loop
  67. && onToken(token)
  68. ) {
  69. return true;
  70. }
  71. }
  72. return false;
  73. };
  74. export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
  75. let size = 0;
  76. const root: TrieNode<Meta> = createNode();
  77. /**
  78. * Method used to add the given suffix to the trie.
  79. */
  80. const add = smolTree
  81. ? (suffix: string, meta?: Meta): void => {
  82. let node: TrieNode<Meta> = root;
  83. const onToken = (token: string) => {
  84. if (node[2].has(token)) {
  85. node = node[2].get(token)!;
  86. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  87. // Dedupe the covered subdomain by skipping
  88. if (token === '.' && node[0]) {
  89. return true;
  90. }
  91. } else {
  92. const newNode = createNode(node);
  93. node[2].set(token, newNode);
  94. node = newNode;
  95. }
  96. return false;
  97. };
  98. // When walkHostnameTokens returns true, we should skip the rest
  99. if (walkHostnameTokens(suffix, onToken)) {
  100. return;
  101. }
  102. // If we are in smolTree mode, we need to do something at the end of the loop
  103. if (suffix[0] === '.') {
  104. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  105. const parent = node[1]!;
  106. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  107. parent[0] = false;
  108. // Removing the rest of the parent's child nodes
  109. node[2].clear();
  110. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  111. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  112. } else if (node[2].get('.')?.[0] === true) {
  113. // Trying to add `example.com` when there is already a `.example.com` in the trie
  114. // No need to increment size and set SENTINEL to true (skip this "new" item)
  115. return;
  116. }
  117. node[0] = true;
  118. node[3] = meta!;
  119. }
  120. : (suffix: string, meta?: Meta): void => {
  121. let node: TrieNode<Meta> = root;
  122. const onToken = (token: string) => {
  123. if (node[2].has(token)) {
  124. node = node[2].get(token)!;
  125. } else {
  126. const newNode = createNode(node);
  127. node[2].set(token, newNode);
  128. node = newNode;
  129. }
  130. return false;
  131. };
  132. // When walkHostnameTokens returns true, we should skip the rest
  133. if (walkHostnameTokens(suffix, onToken)) {
  134. return;
  135. }
  136. if (!node[0]) {
  137. size++;
  138. node[0] = true;
  139. node[3] = meta!;
  140. }
  141. };
  142. const walkIntoLeafWithTokens = (
  143. tokens: string[],
  144. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  145. ) => {
  146. let node: TrieNode = root;
  147. let parent: TrieNode = node;
  148. let token: string;
  149. for (let i = tokens.length - 1; i >= 0; i--) {
  150. token = tokens[i];
  151. if (token === '') {
  152. break;
  153. }
  154. parent = node;
  155. if (node[2].has(token)) {
  156. node = node[2].get(token)!;
  157. } else {
  158. return null;
  159. }
  160. onLoop(node, parent, token);
  161. }
  162. return { node, parent };
  163. };
  164. const walkIntoLeafWithSuffix = (
  165. suffix: string,
  166. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  167. ) => {
  168. let node: TrieNode = root;
  169. let parent: TrieNode = node;
  170. const onToken = (token: string) => {
  171. if (token === '') {
  172. return true;
  173. }
  174. parent = node;
  175. if (node[2].has(token)) {
  176. node = node[2].get(token)!;
  177. } else {
  178. return null;
  179. }
  180. onLoop(node, parent, token);
  181. return false;
  182. };
  183. if (walkHostnameTokens(suffix, onToken) === null) {
  184. return null;
  185. }
  186. return { node, parent };
  187. };
  188. const contains = (suffix: string): boolean => {
  189. return walkIntoLeafWithSuffix(suffix) !== null;
  190. };
  191. const walk = (
  192. onMatches: (suffix: string[], meta: Meta) => void,
  193. initialNode = root,
  194. initialSuffix: string[] = []
  195. ) => {
  196. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  197. // Resolving initial string (begin the start of the stack)
  198. const suffixStack: string[][] = [initialSuffix];
  199. let node: TrieNode<Meta> = root;
  200. do {
  201. node = nodeStack.pop()!;
  202. const suffix = suffixStack.pop()!;
  203. node[2].forEach((childNode, k) => {
  204. // Pushing the child node to the stack for next iteration of DFS
  205. nodeStack.push(childNode);
  206. suffixStack.push([k, ...suffix]);
  207. });
  208. // If the node is a sentinel, we push the suffix to the results
  209. if (node[0]) {
  210. onMatches(suffix, node[3]);
  211. }
  212. } while (nodeStack.length);
  213. };
  214. interface FindSingleChildLeafResult {
  215. node: TrieNode,
  216. toPrune: TrieNode | null,
  217. tokenToPrune: string | null,
  218. parent: TrieNode
  219. }
  220. const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
  221. let toPrune: TrieNode | null = null;
  222. let tokenToPrune: string | null = null;
  223. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  224. // Keeping track of a potential branch to prune
  225. // Even if the node size is 1, but the single child is ".", we should retain the branch
  226. // Since the "." could be special if it is the leaf-est node
  227. const onlyChild = node[2].size < 2 && !node[2].has('.');
  228. if (toPrune != null) { // the top-est branch that could potentially being pruned
  229. if (!onlyChild) {
  230. // The branch has moew than single child, retain the branch.
  231. // And we need to abort prune the parent, so we set it to null
  232. toPrune = null;
  233. tokenToPrune = null;
  234. }
  235. } else if (onlyChild) {
  236. // There is only one token child, or no child at all, we can prune it safely
  237. // It is now the top-est branch that could potentially being pruned
  238. toPrune = parent;
  239. tokenToPrune = token;
  240. }
  241. };
  242. const res = walkIntoLeafWithTokens(tokens, onLoop);
  243. if (res === null) return null;
  244. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  245. };
  246. /**
  247. * Method used to retrieve every item in the trie with the given prefix.
  248. */
  249. const find = (
  250. inputSuffix: string,
  251. /** @default true */ includeEqualWithSuffix = true
  252. ): string[] => {
  253. if (smolTree) {
  254. throw new Error('A Trie with smolTree enabled cannot perform find!');
  255. }
  256. const inputTokens = hostnameToTokens(inputSuffix);
  257. const res = walkIntoLeafWithTokens(inputTokens);
  258. if (res === null) return [];
  259. const matches: string[][] = [];
  260. const onMatches = includeEqualWithSuffix
  261. // fast path (default option)
  262. ? (suffix: string[]) => matches.push(suffix)
  263. // slow path
  264. : (suffix: string[]) => {
  265. if (!deepEqualArray(suffix, inputTokens)) {
  266. matches.push(suffix);
  267. }
  268. };
  269. walk(
  270. onMatches,
  271. res.node, // Performing DFS from prefix
  272. inputTokens
  273. );
  274. return matches.map((m) => fastStringArrayJoin(m, ''));
  275. };
  276. /**
  277. * Method used to delete a prefix from the trie.
  278. */
  279. const remove = (suffix: string): boolean => {
  280. const res = getSingleChildLeaf(hostnameToTokens(suffix));
  281. if (res === null) return false;
  282. if (!res.node[0]) return false;
  283. size--;
  284. const { node, toPrune, tokenToPrune } = res;
  285. if (tokenToPrune && toPrune) {
  286. toPrune[2].delete(tokenToPrune);
  287. } else {
  288. node[0] = false;
  289. }
  290. return true;
  291. };
  292. /**
  293. * Method used to assert whether the given prefix exists in the Trie.
  294. */
  295. const has = (suffix: string): boolean => {
  296. const res = walkIntoLeafWithSuffix(suffix);
  297. return res
  298. ? res.node[0]
  299. : false;
  300. };
  301. function dump(onSuffix: (suffix: string) => void): void;
  302. function dump(): string[];
  303. function dump(onSuffix?: (suffix: string) => void): string[] | void {
  304. const results: string[] = [];
  305. const handleSuffix = onSuffix
  306. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  307. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  308. walk(handleSuffix);
  309. return results;
  310. };
  311. const dumpMeta = () => {
  312. const results: Meta[] = [];
  313. walk((suffix, meta) => {
  314. results.push(meta);
  315. });
  316. return results;
  317. };
  318. const dumpWithMeta = () => {
  319. const results: Array<[string, Meta]> = [];
  320. walk((suffix, meta) => {
  321. results.push([fastStringArrayJoin(suffix, ''), meta]);
  322. });
  323. return results;
  324. };
  325. const whitelist = (suffix: string) => {
  326. if (!smolTree) {
  327. throw new Error('whitelist method is only available in smolTree mode.');
  328. }
  329. const tokens = hostnameToTokens(suffix);
  330. const res = getSingleChildLeaf(tokens);
  331. if (res === null) return;
  332. const { node, toPrune, tokenToPrune, parent } = res;
  333. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  334. if (tokens[0] === '.') {
  335. // If there is a `[start]sub.example.com` here, remove it
  336. parent[0] = false;
  337. // Removing all the child nodes by disconnecting "."
  338. parent[2].delete('.');
  339. }
  340. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  341. const dotNode = node[2].get('.');
  342. if (dotNode) {
  343. dotNode[0] = false;
  344. }
  345. // if (dotNode?.s === true) {
  346. // dotnode[0] = false;
  347. // }
  348. // return early if not found
  349. if (!node[0]) return;
  350. if (tokenToPrune && toPrune) {
  351. toPrune[2].delete(tokenToPrune);
  352. } else {
  353. node[0] = false;
  354. }
  355. };
  356. // Actually build trie
  357. if (Array.isArray(from)) {
  358. for (let i = 0, l = from.length; i < l; i++) {
  359. add(from[i]);
  360. }
  361. } else if (from) {
  362. from.forEach((value) => add(value));
  363. }
  364. const inspect = (depth: number, unpackMeta?: (meta?: Meta) => any) => fastStringArrayJoin(
  365. JSON.stringify(deepTrieNodeToJSON(root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  366. '\n'
  367. );
  368. return {
  369. add,
  370. contains,
  371. find,
  372. remove,
  373. delete: remove,
  374. has,
  375. dump,
  376. dumpMeta,
  377. dumpWithMeta,
  378. get size() {
  379. if (smolTree) {
  380. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  381. }
  382. return size;
  383. },
  384. get root() {
  385. return root;
  386. },
  387. whitelist,
  388. inspect,
  389. [util.inspect.custom]: inspect,
  390. smolTree
  391. };
  392. };
  393. export type Trie = ReturnType<typeof createTrie>;
  394. function deepEqualArray(a: string[], b: string[]) {
  395. let len = a.length;
  396. if (len !== b.length) return false;
  397. while (len--) {
  398. if (a[len] !== b[len]) return false;
  399. }
  400. return true;
  401. };