trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. // const { Error, Bun, JSON, Symbol } = globalThis;
  6. const noop = () => { /** noop */ };
  7. type TrieNode = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode> /** children */
  11. ];
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node[0]) {
  15. obj['[start]'] = node[0];
  16. }
  17. node[2].forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. function trieNodeInspectCustom(this: TrieNode) {
  23. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  24. }
  25. const createNode = (parent: TrieNode | null = null): TrieNode => {
  26. const node = [false, parent, new Map<string, TrieNode>()] as TrieNode;
  27. Object.defineProperty(node, Bun.inspect.custom, { value: trieNodeInspectCustom });
  28. return node;
  29. };
  30. const hostnameToTokens = (hostname: string): string[] => {
  31. let buf = '';
  32. const tokens: string[] = [];
  33. for (let i = 0, l = hostname.length; i < l; i++) {
  34. const c = hostname[i];
  35. if (c === '.') {
  36. if (buf) {
  37. tokens.push(buf, /* . */ c);
  38. buf = '';
  39. } else {
  40. tokens.push(/* . */ c);
  41. }
  42. } else {
  43. buf += c;
  44. }
  45. }
  46. if (buf) {
  47. tokens.push(buf);
  48. }
  49. return tokens;
  50. };
  51. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  52. let size = 0;
  53. const root: TrieNode = createNode();
  54. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  55. const suffixToTokens = hostnameMode
  56. ? hostnameToTokens
  57. : (suffix: string) => suffix;
  58. /**
  59. * Method used to add the given suffix to the trie.
  60. */
  61. const add = (suffix: string): void => {
  62. let node: TrieNode = root;
  63. let token: string;
  64. const tokens = suffixToTokens(suffix);
  65. for (let i = tokens.length - 1; i >= 0; i--) {
  66. token = tokens[i];
  67. if (node[2].has(token)) {
  68. node = node[2].get(token)!;
  69. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  70. // Dedupe the covered subdomain by skipping
  71. if (smolTree && token === '.' && node[0]) {
  72. return;
  73. }
  74. } else {
  75. const newNode = createNode(node);
  76. node[2].set(token, newNode);
  77. node = newNode;
  78. }
  79. }
  80. // If we are in smolTree mode, we need to do something at the end of the loop
  81. if (smolTree) {
  82. if (tokens[0] === '.') {
  83. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  84. const parent = node[1]!;
  85. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  86. parent[0] = false;
  87. // Removing the rest of the parent's child nodes
  88. node[2].clear();
  89. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  90. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  91. } else if (node[2].get('.')?.[0] === true) {
  92. // Trying to add `example.com` when there is already a `.example.com` in the trie
  93. // No need to increment size and set SENTINEL to true (skip this "new" item)
  94. return;
  95. }
  96. } else if (!node[0]) { // smol tree don't have size, so else-if here
  97. size++;
  98. }
  99. node[0] = true;
  100. };
  101. const walkIntoLeafWithTokens = (
  102. tokens: string | string[],
  103. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  104. ) => {
  105. let node: TrieNode = root;
  106. let parent: TrieNode = node;
  107. let token: string;
  108. for (let i = tokens.length - 1; i >= 0; i--) {
  109. token = tokens[i];
  110. if (hostnameMode && token === '') {
  111. break;
  112. }
  113. parent = node;
  114. if (node[2].has(token)) {
  115. node = node[2].get(token)!;
  116. } else {
  117. return null;
  118. }
  119. onLoop(node, parent, token);
  120. }
  121. return { node, parent };
  122. };
  123. const contains = (suffix: string): boolean => {
  124. const tokens = suffixToTokens(suffix);
  125. return walkIntoLeafWithTokens(tokens) !== null;
  126. };
  127. const walk = (
  128. onMatches: (suffix: string | string[]) => void,
  129. initialNode = root,
  130. initialSuffix: string | string[] = hostnameMode ? [] : ''
  131. ) => {
  132. const nodeStack: TrieNode[] = [initialNode];
  133. // Resolving initial string (begin the start of the stack)
  134. const suffixStack: Array<string | string[]> = [initialSuffix];
  135. let node: TrieNode = root;
  136. do {
  137. node = nodeStack.pop()!;
  138. const suffix = suffixStack.pop()!;
  139. node[2].forEach((childNode, k) => {
  140. // Pushing the child node to the stack for next iteration of DFS
  141. nodeStack.push(childNode);
  142. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  143. });
  144. // If the node is a sentinel, we push the suffix to the results
  145. if (node[0]) {
  146. onMatches(suffix);
  147. }
  148. } while (nodeStack.length);
  149. };
  150. interface FindSingleChildLeafResult {
  151. node: TrieNode,
  152. toPrune: TrieNode | null,
  153. tokenToPrune: string | null,
  154. parent: TrieNode
  155. }
  156. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  157. let toPrune: TrieNode | null = null;
  158. let tokenToPrune: string | null = null;
  159. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  160. // Keeping track of a potential branch to prune
  161. // Even if the node size is 1, but the single child is ".", we should retain the branch
  162. // Since the "." could be special if it is the leaf-est node
  163. const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.'));
  164. if (toPrune != null) { // the top-est branch that could potentially being pruned
  165. if (!onlyChild) {
  166. // The branch has moew than single child, retain the branch.
  167. // And we need to abort prune the parent, so we set it to null
  168. toPrune = null;
  169. tokenToPrune = null;
  170. }
  171. } else if (onlyChild) {
  172. // There is only one token child, or no child at all, we can prune it safely
  173. // It is now the top-est branch that could potentially being pruned
  174. toPrune = parent;
  175. tokenToPrune = token;
  176. }
  177. };
  178. const res = walkIntoLeafWithTokens(tokens, onLoop);
  179. if (res === null) return null;
  180. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  181. };
  182. /**
  183. * Method used to retrieve every item in the trie with the given prefix.
  184. */
  185. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  186. if (smolTree) {
  187. throw new Error('A Trie with smolTree enabled cannot perform find!');
  188. }
  189. const inputTokens = suffixToTokens(inputSuffix);
  190. const res = walkIntoLeafWithTokens(inputTokens);
  191. if (res === null) return [];
  192. const matches: Array<string | string[]> = [];
  193. const onMatches = includeEqualWithSuffix
  194. ? (suffix: string | string[]) => matches.push(suffix)
  195. : (
  196. hostnameMode
  197. ? (suffix: string[]) => {
  198. if (suffix.some((t, i) => t !== inputTokens[i])) {
  199. matches.push(suffix);
  200. }
  201. }
  202. : (suffix: string) => {
  203. if (suffix !== inputTokens) {
  204. matches.push(suffix);
  205. }
  206. }
  207. );
  208. walk(
  209. onMatches as any,
  210. res.node, // Performing DFS from prefix
  211. inputTokens
  212. );
  213. return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
  214. };
  215. /**
  216. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  217. */
  218. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  219. if (smolTree) {
  220. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  221. }
  222. const inputTokens = suffixToTokens(inputSuffix);
  223. const res = walkIntoLeafWithTokens(inputTokens);
  224. if (res === null) return;
  225. const onMatches = hostnameMode
  226. ? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
  227. : (suffix: string) => set.delete(suffix);
  228. walk(
  229. onMatches as any,
  230. res.node, // Performing DFS from prefix
  231. inputTokens
  232. );
  233. };
  234. /**
  235. * Method used to delete a prefix from the trie.
  236. */
  237. const remove = (suffix: string): boolean => {
  238. const res = getSingleChildLeaf(suffixToTokens(suffix));
  239. if (res === null) return false;
  240. if (!res.node[0]) return false;
  241. size--;
  242. const { node, toPrune, tokenToPrune } = res;
  243. if (tokenToPrune && toPrune) {
  244. toPrune[2].delete(tokenToPrune);
  245. } else {
  246. node[0] = false;
  247. }
  248. return true;
  249. };
  250. /**
  251. * Method used to assert whether the given prefix exists in the Trie.
  252. */
  253. const has = (suffix: string): boolean => {
  254. const tokens = suffixToTokens(suffix);
  255. const res = walkIntoLeafWithTokens(tokens);
  256. return res
  257. ? res.node[0]
  258. : false;
  259. };
  260. const dump = () => {
  261. const results: string[] = [];
  262. walk(suffix => {
  263. results.push(
  264. isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
  265. );
  266. });
  267. return results;
  268. };
  269. const whitelist = (suffix: string) => {
  270. if (!hostnameMode && !smolTree) {
  271. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  272. }
  273. const tokens = suffixToTokens(suffix);
  274. const res = getSingleChildLeaf(tokens);
  275. if (res === null) return;
  276. const { node, toPrune, tokenToPrune, parent } = res;
  277. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  278. if (tokens[0] === '.') {
  279. // If there is a `[start]sub.example.com` here, remove it
  280. parent[0] = false;
  281. // Removing all the child nodes by disconnecting "."
  282. parent[2].delete('.');
  283. }
  284. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  285. const dotNode = node[2].get('.');
  286. if (dotNode) {
  287. dotNode[0] = false;
  288. }
  289. // if (dotNode?.s === true) {
  290. // dotnode[0] = false;
  291. // }
  292. // return early if not found
  293. if (!node[0]) return;
  294. if (tokenToPrune && toPrune) {
  295. toPrune[2].delete(tokenToPrune);
  296. } else {
  297. node[0] = false;
  298. }
  299. };
  300. // Actually build trie
  301. if (Array.isArray(from)) {
  302. for (let i = 0, l = from.length; i < l; i++) {
  303. add(from[i]);
  304. }
  305. } else if (from) {
  306. from.forEach(add);
  307. }
  308. return {
  309. add,
  310. contains,
  311. find,
  312. substractSetInPlaceFromFound,
  313. remove,
  314. delete: remove,
  315. has,
  316. dump,
  317. get size() {
  318. if (smolTree) {
  319. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  320. }
  321. return size;
  322. },
  323. get root() {
  324. return root;
  325. },
  326. whitelist,
  327. [Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  328. hostnameMode,
  329. smolTree
  330. };
  331. };
  332. export type Trie = ReturnType<typeof createTrie>;
  333. export default createTrie;