trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. // const { Error, Bun, JSON, Symbol } = globalThis;
  5. const SENTINEL = Symbol('SENTINEL');
  6. const PARENT = Symbol('Parent Node');
  7. const noop: VoidFunction = () => { /** noop */ };
  8. type TrieNode = {
  9. [SENTINEL]: boolean,
  10. [PARENT]: TrieNode | null,
  11. [Bun.inspect.custom]: () => string
  12. } & Map<string, TrieNode>;
  13. const deepTrieNodeToJSON = (node: TrieNode) => {
  14. const obj: Record<string, any> = {};
  15. if (node[SENTINEL]) {
  16. obj['[start]'] = node[SENTINEL];
  17. }
  18. node.forEach((value, key) => {
  19. obj[key] = deepTrieNodeToJSON(value);
  20. });
  21. return obj;
  22. };
  23. function trieNodeInspectCustom(this: TrieNode) {
  24. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  25. }
  26. const createNode = (parent: TrieNode | null = null): TrieNode => {
  27. const node = new Map<string, TrieNode>() as TrieNode;
  28. node[SENTINEL] = false;
  29. node[PARENT] = parent;
  30. node[Bun.inspect.custom] = trieNodeInspectCustom;
  31. return node;
  32. };
  33. const hostnameToTokens = (hostname: string): string[] => {
  34. let buf = '';
  35. const tokens: string[] = [];
  36. for (let i = 0, l = hostname.length; i < l; i++) {
  37. const c = hostname[i];
  38. if (c === '.') {
  39. if (buf) {
  40. tokens.push(buf, /* . */ c);
  41. buf = '';
  42. } else {
  43. tokens.push(/* . */ c);
  44. }
  45. } else {
  46. buf += c;
  47. }
  48. }
  49. if (buf) {
  50. tokens.push(buf);
  51. }
  52. return tokens;
  53. };
  54. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  55. let size = 0;
  56. const root: TrieNode = createNode();
  57. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  58. const suffixToTokens = hostnameMode
  59. ? hostnameToTokens
  60. : (suffix: string) => suffix;
  61. /**
  62. * Method used to add the given suffix to the trie.
  63. */
  64. const add = (suffix: string): void => {
  65. let node: TrieNode = root;
  66. let token: string;
  67. const tokens = suffixToTokens(suffix);
  68. for (let i = tokens.length - 1; i >= 0; i--) {
  69. token = tokens[i];
  70. if (node.has(token)) {
  71. node = node.get(token)!;
  72. // During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  73. // Dedupe the covered subdomain by skipping
  74. if (smolTree && (node.get('.')?.[SENTINEL])) return;
  75. } else {
  76. const newNode = createNode(node);
  77. node.set(token, newNode);
  78. node = newNode;
  79. }
  80. }
  81. // If we are in smolTree mode, we need to do something at the end of the loop
  82. if (smolTree) {
  83. if (tokens[0] === '.') {
  84. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  85. const parent = node[PARENT]!;
  86. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  87. parent[SENTINEL] = false;
  88. // Removing the rest of the parent's child nodes by disconnecting the old one and creating a new node
  89. const newNode = createNode(node);
  90. // The SENTINEL of this newNode will be set to true at the end of the function, so we don't need to set it here
  91. parent.set('.', newNode);
  92. // Now the real leaf-est node is the new node, change the pointer to it
  93. node = newNode;
  94. }
  95. if (node.get('.')?.[SENTINEL] === true) {
  96. // Trying to add `example.com` when there is already a `.example.com` in the trie
  97. // No need to increment size and set SENTINEL to true (skip this "new" item)
  98. return;
  99. }
  100. }
  101. // Do we need to increase size?
  102. if (!node[SENTINEL]) {
  103. size++;
  104. }
  105. node[SENTINEL] = true;
  106. };
  107. const walkIntoLeafWithTokens = (
  108. tokens: string | string[],
  109. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  110. ) => {
  111. let node: TrieNode | undefined = root;
  112. let parent: TrieNode = node;
  113. let token: string;
  114. for (let i = tokens.length - 1; i >= 0; i--) {
  115. token = tokens[i];
  116. if (hostnameMode && token === '') {
  117. break;
  118. }
  119. parent = node;
  120. node = node.get(token);
  121. if (!node) return null;
  122. onLoop(node, parent, token);
  123. }
  124. return { node, parent };
  125. };
  126. const contains = (suffix: string): boolean => {
  127. const tokens = suffixToTokens(suffix);
  128. return walkIntoLeafWithTokens(tokens) !== null;
  129. };
  130. const walk = (
  131. onMatches: (suffix: string | string[]) => void,
  132. initialNode = root,
  133. initialSuffix: string | string[] = hostnameMode ? [] : ''
  134. ) => {
  135. const nodeStack: TrieNode[] = [initialNode];
  136. // Resolving initial string (begin the start of the stack)
  137. const suffixStack: Array<string | string[]> = [initialSuffix];
  138. let node: TrieNode = root;
  139. do {
  140. node = nodeStack.pop()!;
  141. const suffix = suffixStack.pop()!;
  142. node.forEach((childNode, k) => {
  143. // Pushing the child node to the stack for next iteration of DFS
  144. nodeStack.push(childNode);
  145. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  146. });
  147. // If the node is a sentinel, we push the suffix to the results
  148. if (node[SENTINEL]) {
  149. onMatches(suffix);
  150. }
  151. } while (nodeStack.length);
  152. };
  153. interface FindSingleChildLeafResult {
  154. node: TrieNode,
  155. toPrune: TrieNode | null,
  156. tokenToPrune: string | null,
  157. parent: TrieNode
  158. }
  159. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  160. let toPrune: TrieNode | null = null;
  161. let tokenToPrune: string | null = null;
  162. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  163. // Keeping track of a potential branch to prune
  164. // Even if the node size is 1, but the single child is ".", we should retain the branch
  165. // Since the "." could be special if it is the leaf-est node
  166. const onlyChild = node.size < 2 && (!hostnameMode || !node.has('.'));
  167. if (toPrune != null) { // the top-est branch that could potentially being pruned
  168. if (!onlyChild) {
  169. // The branch has moew than single child, retain the branch.
  170. // And we need to abort prune the parent, so we set it to null
  171. toPrune = null;
  172. tokenToPrune = null;
  173. }
  174. } else if (onlyChild) {
  175. // There is only one token child, or no child at all, we can prune it safely
  176. // It is now the top-est branch that could potentially being pruned
  177. toPrune = parent;
  178. tokenToPrune = token;
  179. }
  180. };
  181. const res = walkIntoLeafWithTokens(tokens, onLoop);
  182. if (res === null) return null;
  183. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  184. };
  185. /**
  186. * Method used to retrieve every item in the trie with the given prefix.
  187. */
  188. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  189. if (smolTree) {
  190. throw new Error('A Trie with smolTree enabled cannot perform find!');
  191. }
  192. const inputTokens = suffixToTokens(inputSuffix);
  193. const res = walkIntoLeafWithTokens(inputTokens);
  194. if (res === null) return [];
  195. const matches: Array<string | string[]> = [];
  196. const onMatches = includeEqualWithSuffix
  197. ? (suffix: string | string[]) => matches.push(suffix)
  198. : (
  199. hostnameMode
  200. ? (suffix: string[]) => {
  201. if (suffix.some((t, i) => t !== inputTokens[i])) {
  202. matches.push(suffix);
  203. }
  204. }
  205. : (suffix: string) => {
  206. if (suffix !== inputTokens) {
  207. matches.push(suffix);
  208. }
  209. }
  210. );
  211. walk(
  212. onMatches as any,
  213. res.node, // Performing DFS from prefix
  214. inputTokens
  215. );
  216. return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
  217. };
  218. /**
  219. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  220. */
  221. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  222. if (smolTree) {
  223. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  224. }
  225. const inputTokens = suffixToTokens(inputSuffix);
  226. const res = walkIntoLeafWithTokens(inputTokens);
  227. if (res === null) return;
  228. const onMatches = hostnameMode
  229. ? (suffix: string[]) => set.delete(suffix.join(''))
  230. : (suffix: string) => set.delete(suffix);
  231. walk(
  232. onMatches as any,
  233. res.node, // Performing DFS from prefix
  234. inputTokens
  235. );
  236. };
  237. /**
  238. * Method used to delete a prefix from the trie.
  239. */
  240. const remove = (suffix: string): boolean => {
  241. const suffixTokens = suffixToTokens(suffix);
  242. const res = getSingleChildLeaf(suffixTokens);
  243. if (res === null) return false;
  244. if (!res.node[SENTINEL]) return false;
  245. size--;
  246. const { node, toPrune, tokenToPrune } = res;
  247. if (tokenToPrune && toPrune) {
  248. toPrune.delete(tokenToPrune);
  249. } else {
  250. node[SENTINEL] = false;
  251. }
  252. return true;
  253. };
  254. /**
  255. * Method used to assert whether the given prefix exists in the Trie.
  256. */
  257. const has = (suffix: string): boolean => {
  258. const tokens = suffixToTokens(suffix);
  259. const res = walkIntoLeafWithTokens(tokens);
  260. return res
  261. ? res.node[SENTINEL]
  262. : false;
  263. };
  264. const dump = () => {
  265. const results: string[] = [];
  266. walk(suffix => {
  267. results.push(
  268. isHostnameMode(suffix) ? suffix.join('') : suffix
  269. );
  270. });
  271. return results;
  272. };
  273. const whitelist = (suffix: string) => {
  274. if (!hostnameMode && !smolTree) {
  275. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  276. }
  277. const tokens = suffixToTokens(suffix);
  278. const res = getSingleChildLeaf(tokens);
  279. if (res === null) return;
  280. const { node, toPrune, tokenToPrune, parent } = res;
  281. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  282. if (tokens[0] === '.') {
  283. // If there is a `[start]sub.example.com` here, remove it
  284. parent[SENTINEL] = false;
  285. // Removing all the child nodes by disconnecting "."
  286. parent.delete('.');
  287. }
  288. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  289. const dotNode = node.get('.');
  290. if (dotNode?.[SENTINEL] === true) {
  291. dotNode[SENTINEL] = false;
  292. }
  293. // return early if not found
  294. if (!node[SENTINEL]) return;
  295. if (tokenToPrune && toPrune) {
  296. toPrune.delete(tokenToPrune);
  297. } else {
  298. node[SENTINEL] = false;
  299. }
  300. };
  301. // Actually build trie
  302. if (Array.isArray(from)) {
  303. for (let i = 0, l = from.length; i < l; i++) {
  304. add(from[i]);
  305. }
  306. } else if (from) {
  307. from.forEach(add);
  308. }
  309. return {
  310. add,
  311. contains,
  312. find,
  313. substractSetInPlaceFromFound,
  314. remove,
  315. delete: remove,
  316. has,
  317. dump,
  318. get size() {
  319. if (smolTree) {
  320. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  321. }
  322. return size;
  323. },
  324. get root() {
  325. return root;
  326. },
  327. whitelist,
  328. [Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  329. hostnameMode,
  330. smolTree
  331. };
  332. };
  333. export type Trie = ReturnType<typeof createTrie>;
  334. export default createTrie;