trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. // const { Error, Bun, JSON, Symbol } = globalThis;
  6. const noop = () => { /** noop */ };
  7. interface TrieNode {
  8. s: boolean,
  9. p: TrieNode | null,
  10. [Bun.inspect.custom](): string,
  11. c: Map<string, TrieNode>
  12. }
  13. const deepTrieNodeToJSON = (node: TrieNode) => {
  14. const obj: Record<string, any> = {};
  15. if (node.s) {
  16. obj['[start]'] = node.s;
  17. }
  18. node.c.forEach((value, key) => {
  19. obj[key] = deepTrieNodeToJSON(value);
  20. });
  21. return obj;
  22. };
  23. function trieNodeInspectCustom(this: TrieNode) {
  24. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  25. }
  26. const createNode = (parent: TrieNode | null = null): TrieNode => {
  27. return {
  28. s: false,
  29. p: parent,
  30. c: new Map<string, TrieNode>(),
  31. [Bun.inspect.custom]: trieNodeInspectCustom
  32. };
  33. };
  34. const hostnameToTokens = (hostname: string): string[] => {
  35. let buf = '';
  36. const tokens: string[] = [];
  37. for (let i = 0, l = hostname.length; i < l; i++) {
  38. const c = hostname[i];
  39. if (c === '.') {
  40. if (buf) {
  41. tokens.push(buf, /* . */ c);
  42. buf = '';
  43. } else {
  44. tokens.push(/* . */ c);
  45. }
  46. } else {
  47. buf += c;
  48. }
  49. }
  50. if (buf) {
  51. tokens.push(buf);
  52. }
  53. return tokens;
  54. };
  55. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  56. let size = 0;
  57. const root: TrieNode = createNode();
  58. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  59. const suffixToTokens = hostnameMode
  60. ? hostnameToTokens
  61. : (suffix: string) => suffix;
  62. /**
  63. * Method used to add the given suffix to the trie.
  64. */
  65. const add = (suffix: string): void => {
  66. let node: TrieNode = root;
  67. let token: string;
  68. const tokens = suffixToTokens(suffix);
  69. for (let i = tokens.length - 1; i >= 0; i--) {
  70. token = tokens[i];
  71. if (node.c.has(token)) {
  72. node = node.c.get(token)!;
  73. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  74. // Dedupe the covered subdomain by skipping
  75. if (smolTree && token === '.' && node.s) {
  76. return;
  77. }
  78. } else {
  79. const newNode = createNode(node);
  80. node.c.set(token, newNode);
  81. node = newNode;
  82. }
  83. }
  84. // If we are in smolTree mode, we need to do something at the end of the loop
  85. if (smolTree) {
  86. if (tokens[0] === '.') {
  87. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  88. const parent = node.p!;
  89. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  90. parent.s = false;
  91. // Removing the rest of the parent's child nodes
  92. node.c.clear();
  93. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  94. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  95. } else if (node.c.get('.')?.s === true) {
  96. // Trying to add `example.com` when there is already a `.example.com` in the trie
  97. // No need to increment size and set SENTINEL to true (skip this "new" item)
  98. return;
  99. }
  100. } else if (!node.s) { // smol tree don't have size, so else-if here
  101. size++;
  102. }
  103. node.s = true;
  104. };
  105. const walkIntoLeafWithTokens = (
  106. tokens: string | string[],
  107. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  108. ) => {
  109. let node: TrieNode = root;
  110. let parent: TrieNode = node;
  111. let token: string;
  112. for (let i = tokens.length - 1; i >= 0; i--) {
  113. token = tokens[i];
  114. if (hostnameMode && token === '') {
  115. break;
  116. }
  117. parent = node;
  118. if (node.c.has(token)) {
  119. node = node.c.get(token)!;
  120. } else {
  121. return null;
  122. }
  123. onLoop(node, parent, token);
  124. }
  125. return { node, parent };
  126. };
  127. const contains = (suffix: string): boolean => {
  128. const tokens = suffixToTokens(suffix);
  129. return walkIntoLeafWithTokens(tokens) !== null;
  130. };
  131. const walk = (
  132. onMatches: (suffix: string | string[]) => void,
  133. initialNode = root,
  134. initialSuffix: string | string[] = hostnameMode ? [] : ''
  135. ) => {
  136. const nodeStack: TrieNode[] = [initialNode];
  137. // Resolving initial string (begin the start of the stack)
  138. const suffixStack: Array<string | string[]> = [initialSuffix];
  139. let node: TrieNode = root;
  140. do {
  141. node = nodeStack.pop()!;
  142. const suffix = suffixStack.pop()!;
  143. node.c.forEach((childNode, k) => {
  144. // Pushing the child node to the stack for next iteration of DFS
  145. nodeStack.push(childNode);
  146. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  147. });
  148. // If the node is a sentinel, we push the suffix to the results
  149. if (node.s) {
  150. onMatches(suffix);
  151. }
  152. } while (nodeStack.length);
  153. };
  154. interface FindSingleChildLeafResult {
  155. node: TrieNode,
  156. toPrune: TrieNode | null,
  157. tokenToPrune: string | null,
  158. parent: TrieNode
  159. }
  160. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  161. let toPrune: TrieNode | null = null;
  162. let tokenToPrune: string | null = null;
  163. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  164. // Keeping track of a potential branch to prune
  165. // Even if the node size is 1, but the single child is ".", we should retain the branch
  166. // Since the "." could be special if it is the leaf-est node
  167. const onlyChild = node.c.size < 2 && (!hostnameMode || !node.c.has('.'));
  168. if (toPrune != null) { // the top-est branch that could potentially being pruned
  169. if (!onlyChild) {
  170. // The branch has moew than single child, retain the branch.
  171. // And we need to abort prune the parent, so we set it to null
  172. toPrune = null;
  173. tokenToPrune = null;
  174. }
  175. } else if (onlyChild) {
  176. // There is only one token child, or no child at all, we can prune it safely
  177. // It is now the top-est branch that could potentially being pruned
  178. toPrune = parent;
  179. tokenToPrune = token;
  180. }
  181. };
  182. const res = walkIntoLeafWithTokens(tokens, onLoop);
  183. if (res === null) return null;
  184. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  185. };
  186. /**
  187. * Method used to retrieve every item in the trie with the given prefix.
  188. */
  189. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  190. if (smolTree) {
  191. throw new Error('A Trie with smolTree enabled cannot perform find!');
  192. }
  193. const inputTokens = suffixToTokens(inputSuffix);
  194. const res = walkIntoLeafWithTokens(inputTokens);
  195. if (res === null) return [];
  196. const matches: Array<string | string[]> = [];
  197. const onMatches = includeEqualWithSuffix
  198. ? (suffix: string | string[]) => matches.push(suffix)
  199. : (
  200. hostnameMode
  201. ? (suffix: string[]) => {
  202. if (suffix.some((t, i) => t !== inputTokens[i])) {
  203. matches.push(suffix);
  204. }
  205. }
  206. : (suffix: string) => {
  207. if (suffix !== inputTokens) {
  208. matches.push(suffix);
  209. }
  210. }
  211. );
  212. walk(
  213. onMatches as any,
  214. res.node, // Performing DFS from prefix
  215. inputTokens
  216. );
  217. return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
  218. };
  219. /**
  220. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  221. */
  222. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  223. if (smolTree) {
  224. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  225. }
  226. const inputTokens = suffixToTokens(inputSuffix);
  227. const res = walkIntoLeafWithTokens(inputTokens);
  228. if (res === null) return;
  229. const onMatches = hostnameMode
  230. ? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
  231. : (suffix: string) => set.delete(suffix);
  232. walk(
  233. onMatches as any,
  234. res.node, // Performing DFS from prefix
  235. inputTokens
  236. );
  237. };
  238. /**
  239. * Method used to delete a prefix from the trie.
  240. */
  241. const remove = (suffix: string): boolean => {
  242. const res = getSingleChildLeaf(suffixToTokens(suffix));
  243. if (res === null) return false;
  244. if (!res.node.s) return false;
  245. size--;
  246. const { node, toPrune, tokenToPrune } = res;
  247. if (tokenToPrune && toPrune) {
  248. toPrune.c.delete(tokenToPrune);
  249. } else {
  250. node.s = false;
  251. }
  252. return true;
  253. };
  254. /**
  255. * Method used to assert whether the given prefix exists in the Trie.
  256. */
  257. const has = (suffix: string): boolean => {
  258. const tokens = suffixToTokens(suffix);
  259. const res = walkIntoLeafWithTokens(tokens);
  260. return res
  261. ? res.node.s
  262. : false;
  263. };
  264. const dump = () => {
  265. const results: string[] = [];
  266. walk(suffix => {
  267. results.push(
  268. isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
  269. );
  270. });
  271. return results;
  272. };
  273. const whitelist = (suffix: string) => {
  274. if (!hostnameMode && !smolTree) {
  275. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  276. }
  277. const tokens = suffixToTokens(suffix);
  278. const res = getSingleChildLeaf(tokens);
  279. if (res === null) return;
  280. const { node, toPrune, tokenToPrune, parent } = res;
  281. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  282. if (tokens[0] === '.') {
  283. // If there is a `[start]sub.example.com` here, remove it
  284. parent.s = false;
  285. // Removing all the child nodes by disconnecting "."
  286. parent.c.delete('.');
  287. }
  288. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  289. const dotNode = node.c.get('.');
  290. if (dotNode) {
  291. dotNode.s = false;
  292. }
  293. // if (dotNode?.s === true) {
  294. // dotNode.s = false;
  295. // }
  296. // return early if not found
  297. if (!node.s) return;
  298. if (tokenToPrune && toPrune) {
  299. toPrune.c.delete(tokenToPrune);
  300. } else {
  301. node.s = false;
  302. }
  303. };
  304. // Actually build trie
  305. if (Array.isArray(from)) {
  306. for (let i = 0, l = from.length; i < l; i++) {
  307. add(from[i]);
  308. }
  309. } else if (from) {
  310. from.forEach(add);
  311. }
  312. return {
  313. add,
  314. contains,
  315. find,
  316. substractSetInPlaceFromFound,
  317. remove,
  318. delete: remove,
  319. has,
  320. dump,
  321. get size() {
  322. if (smolTree) {
  323. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  324. }
  325. return size;
  326. },
  327. get root() {
  328. return root;
  329. },
  330. whitelist,
  331. [Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  332. hostnameMode,
  333. smolTree
  334. };
  335. };
  336. export type Trie = ReturnType<typeof createTrie>;
  337. export default createTrie;