trie.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (
  14. node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined
  16. ) => {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. if (node[3] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[2].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. };
  33. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  34. export const hostnameToTokens = (hostname: string): string[] => {
  35. const tokens = hostname.split('.');
  36. const results: string[] = [];
  37. let token = '';
  38. for (let i = 0, l = tokens.length; i < l; i++) {
  39. if (i > 0) {
  40. results.push('.');
  41. }
  42. token = tokens[i];
  43. if (token.length > 0) {
  44. results.push(token);
  45. }
  46. }
  47. return results;
  48. };
  49. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  50. const tokens = hostname.split('.');
  51. let token = '';
  52. const l = tokens.length - 1;
  53. for (let i = l; i >= 0; i--) {
  54. if (
  55. i < l // when i === l, we are at the first of hostname, no splitor there
  56. // when onToken returns true, we should skip the rest of the loop
  57. && onToken('.')
  58. ) {
  59. return true;
  60. }
  61. token = tokens[i];
  62. if (
  63. token.length > 0
  64. // when onToken returns true, we should skip the rest of the loop
  65. && onToken(token)
  66. ) {
  67. return true;
  68. }
  69. }
  70. return false;
  71. };
  72. export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
  73. let size = 0;
  74. const root: TrieNode<Meta> = createNode();
  75. /**
  76. * Method used to add the given suffix to the trie.
  77. */
  78. const add = smolTree
  79. ? (suffix: string, meta?: Meta): void => {
  80. let node: TrieNode<Meta> = root;
  81. let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
  82. const onToken = (token: string) => {
  83. curNodeChildren = node[2];
  84. if (curNodeChildren.has(token)) {
  85. node = curNodeChildren.get(token)!;
  86. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  87. if (node[0] && token === '.') {
  88. return true;
  89. }
  90. } else {
  91. const newNode = createNode(node);
  92. curNodeChildren.set(token, newNode);
  93. node = newNode;
  94. }
  95. return false;
  96. };
  97. // When walkHostnameTokens returns true, we should skip the rest
  98. if (walkHostnameTokens(suffix, onToken)) {
  99. return;
  100. }
  101. // If we are in smolTree mode, we need to do something at the end of the loop
  102. if (suffix[0] === '.') {
  103. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  104. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  105. (/** parent */ node[1]!)[0] = false;
  106. // Removing the rest of the parent's child nodes
  107. node[2].clear();
  108. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  109. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  110. } else if (node[2].get('.')?.[0] === true) {
  111. // Trying to add `example.com` when there is already a `.example.com` in the trie
  112. // No need to increment size and set SENTINEL to true (skip this "new" item)
  113. return;
  114. }
  115. node[0] = true;
  116. node[3] = meta!;
  117. }
  118. : (suffix: string, meta?: Meta): void => {
  119. let node: TrieNode<Meta> = root;
  120. const onToken = (token: string) => {
  121. if (node[2].has(token)) {
  122. node = node[2].get(token)!;
  123. } else {
  124. const newNode = createNode(node);
  125. node[2].set(token, newNode);
  126. node = newNode;
  127. }
  128. return false;
  129. };
  130. // When walkHostnameTokens returns true, we should skip the rest
  131. if (walkHostnameTokens(suffix, onToken)) {
  132. return;
  133. }
  134. if (!node[0]) {
  135. size++;
  136. node[0] = true;
  137. node[3] = meta!;
  138. }
  139. };
  140. const walkIntoLeafWithTokens = (
  141. tokens: string[],
  142. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  143. ) => {
  144. let node: TrieNode = root;
  145. let parent: TrieNode = node;
  146. let token: string;
  147. for (let i = tokens.length - 1; i >= 0; i--) {
  148. token = tokens[i];
  149. // if (token === '') {
  150. // break;
  151. // }
  152. parent = node;
  153. if (node[2].has(token)) {
  154. node = node[2].get(token)!;
  155. } else {
  156. return null;
  157. }
  158. onLoop(node, parent, token);
  159. }
  160. return { node, parent };
  161. };
  162. const walkIntoLeafWithSuffix = (
  163. suffix: string,
  164. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  165. ) => {
  166. let node: TrieNode = root;
  167. let parent: TrieNode = node;
  168. const onToken = (token: string) => {
  169. if (token === '') {
  170. return true;
  171. }
  172. parent = node;
  173. if (node[2].has(token)) {
  174. node = node[2].get(token)!;
  175. } else {
  176. return null;
  177. }
  178. onLoop(node, parent, token);
  179. return false;
  180. };
  181. if (walkHostnameTokens(suffix, onToken) === null) {
  182. return null;
  183. }
  184. return { node, parent };
  185. };
  186. const contains = (suffix: string): boolean => walkIntoLeafWithSuffix(suffix) !== null;
  187. const walk = (
  188. onMatches: (suffix: string[], meta: Meta) => void,
  189. initialNode = root,
  190. initialSuffix: string[] = []
  191. ) => {
  192. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  193. // Resolving initial string (begin the start of the stack)
  194. const suffixStack: string[][] = [initialSuffix];
  195. let node: TrieNode<Meta> = root;
  196. do {
  197. node = nodeStack.pop()!;
  198. const suffix = suffixStack.pop()!;
  199. node[2].forEach((childNode, k) => {
  200. // Pushing the child node to the stack for next iteration of DFS
  201. nodeStack.push(childNode);
  202. suffixStack.push([k, ...suffix]);
  203. });
  204. // If the node is a sentinel, we push the suffix to the results
  205. if (node[0]) {
  206. onMatches(suffix, node[3]);
  207. }
  208. } while (nodeStack.length);
  209. };
  210. interface FindSingleChildLeafResult {
  211. node: TrieNode,
  212. toPrune: TrieNode | null,
  213. tokenToPrune: string | null,
  214. parent: TrieNode
  215. }
  216. const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
  217. let toPrune: TrieNode | null = null;
  218. let tokenToPrune: string | null = null;
  219. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  220. // Keeping track of a potential branch to prune
  221. // Even if the node size is 1, but the single child is ".", we should retain the branch
  222. // Since the "." could be special if it is the leaf-est node
  223. const onlyChild = node[2].size < 2 && !node[2].has('.');
  224. if (toPrune != null) { // the top-est branch that could potentially being pruned
  225. if (!onlyChild) {
  226. // The branch has moew than single child, retain the branch.
  227. // And we need to abort prune the parent, so we set it to null
  228. toPrune = null;
  229. tokenToPrune = null;
  230. }
  231. } else if (onlyChild) {
  232. // There is only one token child, or no child at all, we can prune it safely
  233. // It is now the top-est branch that could potentially being pruned
  234. toPrune = parent;
  235. tokenToPrune = token;
  236. }
  237. };
  238. const res = walkIntoLeafWithTokens(tokens, onLoop);
  239. if (res === null) return null;
  240. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  241. };
  242. /**
  243. * Method used to retrieve every item in the trie with the given prefix.
  244. */
  245. const find = (
  246. inputSuffix: string,
  247. /** @default true */ includeEqualWithSuffix = true
  248. ): string[] => {
  249. // if (smolTree) {
  250. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  251. // }
  252. const inputTokens = hostnameToTokens(inputSuffix);
  253. const res = walkIntoLeafWithTokens(inputTokens);
  254. if (res === null) return [];
  255. const matches: string[][] = [];
  256. const onMatches = includeEqualWithSuffix
  257. // fast path (default option)
  258. ? (suffix: string[]) => matches.push(suffix)
  259. // slow path
  260. : (suffix: string[]) => {
  261. if (!deepEqualArray(suffix, inputTokens)) {
  262. matches.push(suffix);
  263. }
  264. };
  265. walk(
  266. onMatches,
  267. res.node, // Performing DFS from prefix
  268. inputTokens
  269. );
  270. return matches.map((m) => fastStringArrayJoin(m, ''));
  271. };
  272. /**
  273. * Method used to delete a prefix from the trie.
  274. */
  275. const remove = (suffix: string): boolean => {
  276. const res = getSingleChildLeaf(hostnameToTokens(suffix));
  277. if (res === null) return false;
  278. if (!res.node[0]) return false;
  279. size--;
  280. const { node, toPrune, tokenToPrune } = res;
  281. if (tokenToPrune && toPrune) {
  282. toPrune[2].delete(tokenToPrune);
  283. } else {
  284. node[0] = false;
  285. }
  286. return true;
  287. };
  288. /**
  289. * Method used to assert whether the given prefix exists in the Trie.
  290. */
  291. const has = (suffix: string): boolean => {
  292. const res = walkIntoLeafWithSuffix(suffix);
  293. return res
  294. ? res.node[0]
  295. : false;
  296. };
  297. function dump(onSuffix: (suffix: string) => void): void;
  298. function dump(): string[];
  299. function dump(onSuffix?: (suffix: string) => void): string[] | void {
  300. const results: string[] = [];
  301. const handleSuffix = onSuffix
  302. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  303. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  304. walk(handleSuffix);
  305. return results;
  306. };
  307. const dumpMeta = () => {
  308. const results: Meta[] = [];
  309. walk((suffix, meta) => {
  310. results.push(meta);
  311. });
  312. return results;
  313. };
  314. const dumpWithMeta = () => {
  315. const results: Array<[string, Meta]> = [];
  316. walk((suffix, meta) => {
  317. results.push([fastStringArrayJoin(suffix, ''), meta]);
  318. });
  319. return results;
  320. };
  321. const whitelist = (suffix: string) => {
  322. if (!smolTree) {
  323. throw new Error('whitelist method is only available in smolTree mode.');
  324. }
  325. const tokens = hostnameToTokens(suffix);
  326. const res = getSingleChildLeaf(tokens);
  327. if (res === null) return;
  328. const { node, toPrune, tokenToPrune, parent } = res;
  329. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  330. if (tokens[0] === '.') {
  331. // If there is a `[start]sub.example.com` here, remove it
  332. parent[0] = false;
  333. // Removing all the child nodes by empty the children
  334. // This removes the only child ".", which removes "blog.sub.example.com"
  335. parent[2].clear();
  336. }
  337. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  338. const dotNode = node[2].get('.');
  339. if (dotNode) {
  340. dotNode[0] = false;
  341. }
  342. // return early if not found
  343. if (!node[0]) return;
  344. if (tokenToPrune && toPrune) {
  345. toPrune[2].delete(tokenToPrune);
  346. } else {
  347. node[0] = false;
  348. }
  349. };
  350. // Actually build trie
  351. if (Array.isArray(from)) {
  352. for (let i = 0, l = from.length; i < l; i++) {
  353. add(from[i]);
  354. }
  355. } else if (from) {
  356. from.forEach((value) => add(value));
  357. }
  358. const inspect = (depth: number, unpackMeta?: (meta?: Meta) => any) => fastStringArrayJoin(
  359. JSON.stringify(deepTrieNodeToJSON(root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  360. '\n'
  361. );
  362. return {
  363. add,
  364. contains,
  365. find,
  366. remove,
  367. delete: remove,
  368. has,
  369. dump,
  370. dumpMeta,
  371. dumpWithMeta,
  372. get size() {
  373. if (smolTree) {
  374. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  375. }
  376. return size;
  377. },
  378. get root() {
  379. return root;
  380. },
  381. whitelist,
  382. inspect,
  383. [util.inspect.custom]: inspect,
  384. smolTree
  385. };
  386. };
  387. export type Trie = ReturnType<typeof createTrie>;
  388. function deepEqualArray(a: string[], b: string[]) {
  389. let len = a.length;
  390. if (len !== b.length) return false;
  391. while (len--) {
  392. if (a[len] !== b[len]) return false;
  393. }
  394. return true;
  395. };