trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import { inspect } from 'util';
  6. // const { Error, Bun, JSON, Symbol } = globalThis;
  7. const noop = () => { /** noop */ };
  8. type TrieNode = [
  9. boolean, /** sentinel */
  10. TrieNode | null, /** parent */
  11. Map<string, TrieNode> /** children */
  12. ];
  13. const deepTrieNodeToJSON = (node: TrieNode) => {
  14. const obj: Record<string, any> = {};
  15. if (node[0]) {
  16. obj['[start]'] = node[0];
  17. }
  18. node[2].forEach((value, key) => {
  19. obj[key] = deepTrieNodeToJSON(value);
  20. });
  21. return obj;
  22. };
  23. const createNode = (parent: TrieNode | null = null): TrieNode => {
  24. return [false, parent, new Map<string, TrieNode>()] as TrieNode;
  25. };
  26. const hostnameToTokens = (hostname: string): string[] => {
  27. return hostname.split('.').reduce<string[]>((acc, token, index) => {
  28. if (index > 0) {
  29. acc.push('.', token);
  30. } else if (token.length > 0) {
  31. acc.push(token);
  32. }
  33. return acc;
  34. }, []);
  35. };
  36. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  37. let size = 0;
  38. const root: TrieNode = createNode();
  39. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  40. const suffixToTokens = hostnameMode
  41. ? hostnameToTokens
  42. : (suffix: string) => suffix;
  43. /**
  44. * Method used to add the given suffix to the trie.
  45. */
  46. const add = smolTree
  47. ? (suffix: string): void => {
  48. let node: TrieNode = root;
  49. let token: string;
  50. const tokens = suffixToTokens(suffix);
  51. for (let i = tokens.length - 1; i >= 0; i--) {
  52. token = tokens[i];
  53. if (node[2].has(token)) {
  54. node = node[2].get(token)!;
  55. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  56. // Dedupe the covered subdomain by skipping
  57. if (token === '.' && node[0]) {
  58. return;
  59. }
  60. } else {
  61. const newNode = createNode(node);
  62. node[2].set(token, newNode);
  63. node = newNode;
  64. }
  65. }
  66. // If we are in smolTree mode, we need to do something at the end of the loop
  67. if (tokens[0] === '.') {
  68. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  69. const parent = node[1]!;
  70. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  71. parent[0] = false;
  72. // Removing the rest of the parent's child nodes
  73. node[2].clear();
  74. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  75. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  76. } else if (node[2].get('.')?.[0] === true) {
  77. // Trying to add `example.com` when there is already a `.example.com` in the trie
  78. // No need to increment size and set SENTINEL to true (skip this "new" item)
  79. return;
  80. }
  81. node[0] = true;
  82. }
  83. : (suffix: string): void => {
  84. let node: TrieNode = root;
  85. let token: string;
  86. const tokens = suffixToTokens(suffix);
  87. for (let i = tokens.length - 1; i >= 0; i--) {
  88. token = tokens[i];
  89. if (node[2].has(token)) {
  90. node = node[2].get(token)!;
  91. } else {
  92. const newNode = createNode(node);
  93. node[2].set(token, newNode);
  94. node = newNode;
  95. }
  96. }
  97. if (!node[0]) { // smol tree don't have size, so else-if here
  98. size++;
  99. node[0] = true;
  100. }
  101. };
  102. const walkIntoLeafWithTokens = (
  103. tokens: string | string[],
  104. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  105. ) => {
  106. let node: TrieNode = root;
  107. let parent: TrieNode = node;
  108. let token: string;
  109. for (let i = tokens.length - 1; i >= 0; i--) {
  110. token = tokens[i];
  111. if (hostnameMode && token === '') {
  112. break;
  113. }
  114. parent = node;
  115. if (node[2].has(token)) {
  116. node = node[2].get(token)!;
  117. } else {
  118. return null;
  119. }
  120. onLoop(node, parent, token);
  121. }
  122. return { node, parent };
  123. };
  124. const contains = (suffix: string): boolean => {
  125. const tokens = suffixToTokens(suffix);
  126. return walkIntoLeafWithTokens(tokens) !== null;
  127. };
  128. const walk = (
  129. onMatches: (suffix: string | string[]) => void,
  130. initialNode = root,
  131. initialSuffix: string | string[] = hostnameMode ? [] : ''
  132. ) => {
  133. const nodeStack: TrieNode[] = [initialNode];
  134. // Resolving initial string (begin the start of the stack)
  135. const suffixStack: Array<string | string[]> = [initialSuffix];
  136. let node: TrieNode = root;
  137. do {
  138. node = nodeStack.pop()!;
  139. const suffix = suffixStack.pop()!;
  140. node[2].forEach((childNode, k) => {
  141. // Pushing the child node to the stack for next iteration of DFS
  142. nodeStack.push(childNode);
  143. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  144. });
  145. // If the node is a sentinel, we push the suffix to the results
  146. if (node[0]) {
  147. onMatches(suffix);
  148. }
  149. } while (nodeStack.length);
  150. };
  151. interface FindSingleChildLeafResult {
  152. node: TrieNode,
  153. toPrune: TrieNode | null,
  154. tokenToPrune: string | null,
  155. parent: TrieNode
  156. }
  157. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  158. let toPrune: TrieNode | null = null;
  159. let tokenToPrune: string | null = null;
  160. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  161. // Keeping track of a potential branch to prune
  162. // Even if the node size is 1, but the single child is ".", we should retain the branch
  163. // Since the "." could be special if it is the leaf-est node
  164. const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.'));
  165. if (toPrune != null) { // the top-est branch that could potentially being pruned
  166. if (!onlyChild) {
  167. // The branch has moew than single child, retain the branch.
  168. // And we need to abort prune the parent, so we set it to null
  169. toPrune = null;
  170. tokenToPrune = null;
  171. }
  172. } else if (onlyChild) {
  173. // There is only one token child, or no child at all, we can prune it safely
  174. // It is now the top-est branch that could potentially being pruned
  175. toPrune = parent;
  176. tokenToPrune = token;
  177. }
  178. };
  179. const res = walkIntoLeafWithTokens(tokens, onLoop);
  180. if (res === null) return null;
  181. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  182. };
  183. /**
  184. * Method used to retrieve every item in the trie with the given prefix.
  185. */
  186. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  187. if (smolTree) {
  188. throw new Error('A Trie with smolTree enabled cannot perform find!');
  189. }
  190. const inputTokens = suffixToTokens(inputSuffix);
  191. const res = walkIntoLeafWithTokens(inputTokens);
  192. if (res === null) return [];
  193. const matches: Array<string | string[]> = [];
  194. const onMatches = includeEqualWithSuffix
  195. ? (suffix: string | string[]) => matches.push(suffix)
  196. : (
  197. hostnameMode
  198. ? (suffix: string[]) => {
  199. if (suffix.some((t, i) => t !== inputTokens[i])) {
  200. matches.push(suffix);
  201. }
  202. }
  203. : (suffix: string) => {
  204. if (suffix !== inputTokens) {
  205. matches.push(suffix);
  206. }
  207. }
  208. );
  209. walk(
  210. onMatches as any,
  211. res.node, // Performing DFS from prefix
  212. inputTokens
  213. );
  214. return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
  215. };
  216. /**
  217. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  218. */
  219. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  220. if (smolTree) {
  221. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  222. }
  223. const inputTokens = suffixToTokens(inputSuffix);
  224. const res = walkIntoLeafWithTokens(inputTokens);
  225. if (res === null) return;
  226. const onMatches = hostnameMode
  227. ? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
  228. : (suffix: string) => set.delete(suffix);
  229. walk(
  230. onMatches as any,
  231. res.node, // Performing DFS from prefix
  232. inputTokens
  233. );
  234. };
  235. /**
  236. * Method used to delete a prefix from the trie.
  237. */
  238. const remove = (suffix: string): boolean => {
  239. const res = getSingleChildLeaf(suffixToTokens(suffix));
  240. if (res === null) return false;
  241. if (!res.node[0]) return false;
  242. size--;
  243. const { node, toPrune, tokenToPrune } = res;
  244. if (tokenToPrune && toPrune) {
  245. toPrune[2].delete(tokenToPrune);
  246. } else {
  247. node[0] = false;
  248. }
  249. return true;
  250. };
  251. /**
  252. * Method used to assert whether the given prefix exists in the Trie.
  253. */
  254. const has = (suffix: string): boolean => {
  255. const tokens = suffixToTokens(suffix);
  256. const res = walkIntoLeafWithTokens(tokens);
  257. return res
  258. ? res.node[0]
  259. : false;
  260. };
  261. const dump = () => {
  262. const results: string[] = [];
  263. walk(suffix => {
  264. results.push(
  265. isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
  266. );
  267. });
  268. return results;
  269. };
  270. const whitelist = (suffix: string) => {
  271. if (!hostnameMode && !smolTree) {
  272. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  273. }
  274. const tokens = suffixToTokens(suffix);
  275. const res = getSingleChildLeaf(tokens);
  276. if (res === null) return;
  277. const { node, toPrune, tokenToPrune, parent } = res;
  278. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  279. if (tokens[0] === '.') {
  280. // If there is a `[start]sub.example.com` here, remove it
  281. parent[0] = false;
  282. // Removing all the child nodes by disconnecting "."
  283. parent[2].delete('.');
  284. }
  285. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  286. const dotNode = node[2].get('.');
  287. if (dotNode) {
  288. dotNode[0] = false;
  289. }
  290. // if (dotNode?.s === true) {
  291. // dotnode[0] = false;
  292. // }
  293. // return early if not found
  294. if (!node[0]) return;
  295. if (tokenToPrune && toPrune) {
  296. toPrune[2].delete(tokenToPrune);
  297. } else {
  298. node[0] = false;
  299. }
  300. };
  301. // Actually build trie
  302. if (Array.isArray(from)) {
  303. for (let i = 0, l = from.length; i < l; i++) {
  304. add(from[i]);
  305. }
  306. } else if (from) {
  307. from.forEach(add);
  308. }
  309. return {
  310. add,
  311. contains,
  312. find,
  313. substractSetInPlaceFromFound,
  314. remove,
  315. delete: remove,
  316. has,
  317. dump,
  318. get size() {
  319. if (smolTree) {
  320. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  321. }
  322. return size;
  323. },
  324. get root() {
  325. return root;
  326. },
  327. whitelist,
  328. [inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  329. hostnameMode,
  330. smolTree
  331. };
  332. };
  333. export type Trie = ReturnType<typeof createTrie>;
  334. export default createTrie;