trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. // const { Error, Bun, JSON, Symbol } = globalThis;
  5. const noop = () => { /** noop */ };
  6. interface TrieNode {
  7. s: boolean,
  8. p: TrieNode | null,
  9. [Bun.inspect.custom](): string,
  10. c: Map<string, TrieNode>
  11. }
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node.s) {
  15. obj['[start]'] = node.s;
  16. }
  17. node.c.forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. function trieNodeInspectCustom(this: TrieNode) {
  23. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  24. }
  25. const createNode = (parent: TrieNode | null = null): TrieNode => {
  26. return {
  27. s: false,
  28. p: parent,
  29. c: new Map<string, TrieNode>(),
  30. [Bun.inspect.custom]: trieNodeInspectCustom
  31. };
  32. };
  33. const hostnameToTokens = (hostname: string): string[] => {
  34. let buf = '';
  35. const tokens: string[] = [];
  36. for (let i = 0, l = hostname.length; i < l; i++) {
  37. const c = hostname[i];
  38. if (c === '.') {
  39. if (buf) {
  40. tokens.push(buf, /* . */ c);
  41. buf = '';
  42. } else {
  43. tokens.push(/* . */ c);
  44. }
  45. } else {
  46. buf += c;
  47. }
  48. }
  49. if (buf) {
  50. tokens.push(buf);
  51. }
  52. return tokens;
  53. };
  54. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  55. let size = 0;
  56. const root: TrieNode = createNode();
  57. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  58. const suffixToTokens = hostnameMode
  59. ? hostnameToTokens
  60. : (suffix: string) => suffix;
  61. /**
  62. * Method used to add the given suffix to the trie.
  63. */
  64. const add = (suffix: string): void => {
  65. let node: TrieNode = root;
  66. let token: string;
  67. const tokens = suffixToTokens(suffix);
  68. for (let i = tokens.length - 1; i >= 0; i--) {
  69. token = tokens[i];
  70. if (node.c.has(token)) {
  71. node = node.c.get(token)!;
  72. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  73. // Dedupe the covered subdomain by skipping
  74. if (smolTree && token === '.' && node.s) {
  75. return;
  76. }
  77. } else {
  78. const newNode = createNode(node);
  79. node.c.set(token, newNode);
  80. node = newNode;
  81. }
  82. }
  83. // If we are in smolTree mode, we need to do something at the end of the loop
  84. if (smolTree) {
  85. if (tokens[0] === '.') {
  86. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  87. const parent = node.p!;
  88. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  89. parent.s = false;
  90. // Removing the rest of the parent's child nodes
  91. node.c.clear();
  92. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  93. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  94. } else if (node.c.get('.')?.s === true) {
  95. // Trying to add `example.com` when there is already a `.example.com` in the trie
  96. // No need to increment size and set SENTINEL to true (skip this "new" item)
  97. return;
  98. }
  99. } else if (!node.s) { // smol tree don't have size, so else-if here
  100. size++;
  101. }
  102. node.s = true;
  103. };
  104. const walkIntoLeafWithTokens = (
  105. tokens: string | string[],
  106. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  107. ) => {
  108. let node: TrieNode = root;
  109. let parent: TrieNode = node;
  110. let token: string;
  111. for (let i = tokens.length - 1; i >= 0; i--) {
  112. token = tokens[i];
  113. if (hostnameMode && token === '') {
  114. break;
  115. }
  116. parent = node;
  117. if (node.c.has(token)) {
  118. node = node.c.get(token)!;
  119. } else {
  120. return null;
  121. }
  122. onLoop(node, parent, token);
  123. }
  124. return { node, parent };
  125. };
  126. const contains = (suffix: string): boolean => {
  127. const tokens = suffixToTokens(suffix);
  128. return walkIntoLeafWithTokens(tokens) !== null;
  129. };
  130. const walk = (
  131. onMatches: (suffix: string | string[]) => void,
  132. initialNode = root,
  133. initialSuffix: string | string[] = hostnameMode ? [] : ''
  134. ) => {
  135. const nodeStack: TrieNode[] = [initialNode];
  136. // Resolving initial string (begin the start of the stack)
  137. const suffixStack: Array<string | string[]> = [initialSuffix];
  138. let node: TrieNode = root;
  139. do {
  140. node = nodeStack.pop()!;
  141. const suffix = suffixStack.pop()!;
  142. node.c.forEach((childNode, k) => {
  143. // Pushing the child node to the stack for next iteration of DFS
  144. nodeStack.push(childNode);
  145. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  146. });
  147. // If the node is a sentinel, we push the suffix to the results
  148. if (node.s) {
  149. onMatches(suffix);
  150. }
  151. } while (nodeStack.length);
  152. };
  153. interface FindSingleChildLeafResult {
  154. node: TrieNode,
  155. toPrune: TrieNode | null,
  156. tokenToPrune: string | null,
  157. parent: TrieNode
  158. }
  159. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  160. let toPrune: TrieNode | null = null;
  161. let tokenToPrune: string | null = null;
  162. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  163. // Keeping track of a potential branch to prune
  164. // Even if the node size is 1, but the single child is ".", we should retain the branch
  165. // Since the "." could be special if it is the leaf-est node
  166. const onlyChild = node.c.size < 2 && (!hostnameMode || !node.c.has('.'));
  167. if (toPrune != null) { // the top-est branch that could potentially being pruned
  168. if (!onlyChild) {
  169. // The branch has moew than single child, retain the branch.
  170. // And we need to abort prune the parent, so we set it to null
  171. toPrune = null;
  172. tokenToPrune = null;
  173. }
  174. } else if (onlyChild) {
  175. // There is only one token child, or no child at all, we can prune it safely
  176. // It is now the top-est branch that could potentially being pruned
  177. toPrune = parent;
  178. tokenToPrune = token;
  179. }
  180. };
  181. const res = walkIntoLeafWithTokens(tokens, onLoop);
  182. if (res === null) return null;
  183. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  184. };
  185. /**
  186. * Method used to retrieve every item in the trie with the given prefix.
  187. */
  188. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  189. if (smolTree) {
  190. throw new Error('A Trie with smolTree enabled cannot perform find!');
  191. }
  192. const inputTokens = suffixToTokens(inputSuffix);
  193. const res = walkIntoLeafWithTokens(inputTokens);
  194. if (res === null) return [];
  195. const matches: Array<string | string[]> = [];
  196. const onMatches = includeEqualWithSuffix
  197. ? (suffix: string | string[]) => matches.push(suffix)
  198. : (
  199. hostnameMode
  200. ? (suffix: string[]) => {
  201. if (suffix.some((t, i) => t !== inputTokens[i])) {
  202. matches.push(suffix);
  203. }
  204. }
  205. : (suffix: string) => {
  206. if (suffix !== inputTokens) {
  207. matches.push(suffix);
  208. }
  209. }
  210. );
  211. walk(
  212. onMatches as any,
  213. res.node, // Performing DFS from prefix
  214. inputTokens
  215. );
  216. return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
  217. };
  218. /**
  219. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  220. */
  221. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  222. if (smolTree) {
  223. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  224. }
  225. const inputTokens = suffixToTokens(inputSuffix);
  226. const res = walkIntoLeafWithTokens(inputTokens);
  227. if (res === null) return;
  228. const onMatches = hostnameMode
  229. ? (suffix: string[]) => set.delete(suffix.join(''))
  230. : (suffix: string) => set.delete(suffix);
  231. walk(
  232. onMatches as any,
  233. res.node, // Performing DFS from prefix
  234. inputTokens
  235. );
  236. };
  237. /**
  238. * Method used to delete a prefix from the trie.
  239. */
  240. const remove = (suffix: string): boolean => {
  241. const res = getSingleChildLeaf(suffixToTokens(suffix));
  242. if (res === null) return false;
  243. if (!res.node.s) return false;
  244. size--;
  245. const { node, toPrune, tokenToPrune } = res;
  246. if (tokenToPrune && toPrune) {
  247. toPrune.c.delete(tokenToPrune);
  248. } else {
  249. node.s = false;
  250. }
  251. return true;
  252. };
  253. /**
  254. * Method used to assert whether the given prefix exists in the Trie.
  255. */
  256. const has = (suffix: string): boolean => {
  257. const tokens = suffixToTokens(suffix);
  258. const res = walkIntoLeafWithTokens(tokens);
  259. return res
  260. ? res.node.s
  261. : false;
  262. };
  263. const dump = () => {
  264. const results: string[] = [];
  265. walk(suffix => {
  266. results.push(
  267. isHostnameMode(suffix) ? suffix.join('') : suffix
  268. );
  269. });
  270. return results;
  271. };
  272. const whitelist = (suffix: string) => {
  273. if (!hostnameMode && !smolTree) {
  274. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  275. }
  276. const tokens = suffixToTokens(suffix);
  277. const res = getSingleChildLeaf(tokens);
  278. if (res === null) return;
  279. const { node, toPrune, tokenToPrune, parent } = res;
  280. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  281. if (tokens[0] === '.') {
  282. // If there is a `[start]sub.example.com` here, remove it
  283. parent.s = false;
  284. // Removing all the child nodes by disconnecting "."
  285. parent.c.delete('.');
  286. }
  287. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  288. const dotNode = node.c.get('.');
  289. if (dotNode) {
  290. dotNode.s = false;
  291. }
  292. // if (dotNode?.s === true) {
  293. // dotNode.s = false;
  294. // }
  295. // return early if not found
  296. if (!node.s) return;
  297. if (tokenToPrune && toPrune) {
  298. toPrune.c.delete(tokenToPrune);
  299. } else {
  300. node.s = false;
  301. }
  302. };
  303. // Actually build trie
  304. if (Array.isArray(from)) {
  305. for (let i = 0, l = from.length; i < l; i++) {
  306. add(from[i]);
  307. }
  308. } else if (from) {
  309. from.forEach(add);
  310. }
  311. return {
  312. add,
  313. contains,
  314. find,
  315. substractSetInPlaceFromFound,
  316. remove,
  317. delete: remove,
  318. has,
  319. dump,
  320. get size() {
  321. if (smolTree) {
  322. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  323. }
  324. return size;
  325. },
  326. get root() {
  327. return root;
  328. },
  329. whitelist,
  330. [Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  331. hostnameMode,
  332. smolTree
  333. };
  334. };
  335. export type Trie = ReturnType<typeof createTrie>;
  336. export default createTrie;