trie.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. // const { Error, Bun, JSON, Symbol } = globalThis;
  6. const noop = () => { /** noop */ };
  7. type TrieNode = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode> /** children */
  11. ];
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node[0]) {
  15. obj['[start]'] = node[0];
  16. }
  17. node[2].forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. const createNode = (parent: TrieNode | null = null): TrieNode => {
  23. return [false, parent, new Map<string, TrieNode>()] as TrieNode;
  24. };
  25. const hostnameToTokens = (hostname: string): string[] => {
  26. let buf = '';
  27. const tokens: string[] = [];
  28. for (let i = 0, l = hostname.length; i < l; i++) {
  29. const c = hostname[i];
  30. if (c === '.') {
  31. if (buf) {
  32. tokens.push(buf, /* . */ c);
  33. buf = '';
  34. } else {
  35. tokens.push(/* . */ c);
  36. }
  37. } else {
  38. buf += c;
  39. }
  40. }
  41. if (buf) {
  42. tokens.push(buf);
  43. }
  44. return tokens;
  45. };
  46. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  47. let size = 0;
  48. const root: TrieNode = createNode();
  49. const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
  50. const suffixToTokens = hostnameMode
  51. ? hostnameToTokens
  52. : (suffix: string) => suffix;
  53. /**
  54. * Method used to add the given suffix to the trie.
  55. */
  56. const add = smolTree
  57. ? (suffix: string): void => {
  58. let node: TrieNode = root;
  59. let token: string;
  60. const tokens = suffixToTokens(suffix);
  61. for (let i = tokens.length - 1; i >= 0; i--) {
  62. token = tokens[i];
  63. if (node[2].has(token)) {
  64. node = node[2].get(token)!;
  65. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  66. // Dedupe the covered subdomain by skipping
  67. if (token === '.' && node[0]) {
  68. return;
  69. }
  70. } else {
  71. const newNode = createNode(node);
  72. node[2].set(token, newNode);
  73. node = newNode;
  74. }
  75. }
  76. // If we are in smolTree mode, we need to do something at the end of the loop
  77. if (tokens[0] === '.') {
  78. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  79. const parent = node[1]!;
  80. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  81. parent[0] = false;
  82. // Removing the rest of the parent's child nodes
  83. node[2].clear();
  84. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  85. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  86. } else if (node[2].get('.')?.[0] === true) {
  87. // Trying to add `example.com` when there is already a `.example.com` in the trie
  88. // No need to increment size and set SENTINEL to true (skip this "new" item)
  89. return;
  90. }
  91. node[0] = true;
  92. }
  93. : (suffix: string): void => {
  94. let node: TrieNode = root;
  95. let token: string;
  96. const tokens = suffixToTokens(suffix);
  97. for (let i = tokens.length - 1; i >= 0; i--) {
  98. token = tokens[i];
  99. if (node[2].has(token)) {
  100. node = node[2].get(token)!;
  101. } else {
  102. const newNode = createNode(node);
  103. node[2].set(token, newNode);
  104. node = newNode;
  105. }
  106. }
  107. if (!node[0]) { // smol tree don't have size, so else-if here
  108. size++;
  109. node[0] = true;
  110. }
  111. };
  112. const walkIntoLeafWithTokens = (
  113. tokens: string | string[],
  114. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  115. ) => {
  116. let node: TrieNode = root;
  117. let parent: TrieNode = node;
  118. let token: string;
  119. for (let i = tokens.length - 1; i >= 0; i--) {
  120. token = tokens[i];
  121. if (hostnameMode && token === '') {
  122. break;
  123. }
  124. parent = node;
  125. if (node[2].has(token)) {
  126. node = node[2].get(token)!;
  127. } else {
  128. return null;
  129. }
  130. onLoop(node, parent, token);
  131. }
  132. return { node, parent };
  133. };
  134. const contains = (suffix: string): boolean => {
  135. const tokens = suffixToTokens(suffix);
  136. return walkIntoLeafWithTokens(tokens) !== null;
  137. };
  138. const walk = (
  139. onMatches: (suffix: string | string[]) => void,
  140. initialNode = root,
  141. initialSuffix: string | string[] = hostnameMode ? [] : ''
  142. ) => {
  143. const nodeStack: TrieNode[] = [initialNode];
  144. // Resolving initial string (begin the start of the stack)
  145. const suffixStack: Array<string | string[]> = [initialSuffix];
  146. let node: TrieNode = root;
  147. do {
  148. node = nodeStack.pop()!;
  149. const suffix = suffixStack.pop()!;
  150. node[2].forEach((childNode, k) => {
  151. // Pushing the child node to the stack for next iteration of DFS
  152. nodeStack.push(childNode);
  153. suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
  154. });
  155. // If the node is a sentinel, we push the suffix to the results
  156. if (node[0]) {
  157. onMatches(suffix);
  158. }
  159. } while (nodeStack.length);
  160. };
  161. interface FindSingleChildLeafResult {
  162. node: TrieNode,
  163. toPrune: TrieNode | null,
  164. tokenToPrune: string | null,
  165. parent: TrieNode
  166. }
  167. const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
  168. let toPrune: TrieNode | null = null;
  169. let tokenToPrune: string | null = null;
  170. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  171. // Keeping track of a potential branch to prune
  172. // Even if the node size is 1, but the single child is ".", we should retain the branch
  173. // Since the "." could be special if it is the leaf-est node
  174. const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.'));
  175. if (toPrune != null) { // the top-est branch that could potentially being pruned
  176. if (!onlyChild) {
  177. // The branch has moew than single child, retain the branch.
  178. // And we need to abort prune the parent, so we set it to null
  179. toPrune = null;
  180. tokenToPrune = null;
  181. }
  182. } else if (onlyChild) {
  183. // There is only one token child, or no child at all, we can prune it safely
  184. // It is now the top-est branch that could potentially being pruned
  185. toPrune = parent;
  186. tokenToPrune = token;
  187. }
  188. };
  189. const res = walkIntoLeafWithTokens(tokens, onLoop);
  190. if (res === null) return null;
  191. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  192. };
  193. /**
  194. * Method used to retrieve every item in the trie with the given prefix.
  195. */
  196. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  197. if (smolTree) {
  198. throw new Error('A Trie with smolTree enabled cannot perform find!');
  199. }
  200. const inputTokens = suffixToTokens(inputSuffix);
  201. const res = walkIntoLeafWithTokens(inputTokens);
  202. if (res === null) return [];
  203. const matches: Array<string | string[]> = [];
  204. const onMatches = includeEqualWithSuffix
  205. ? (suffix: string | string[]) => matches.push(suffix)
  206. : (
  207. hostnameMode
  208. ? (suffix: string[]) => {
  209. if (suffix.some((t, i) => t !== inputTokens[i])) {
  210. matches.push(suffix);
  211. }
  212. }
  213. : (suffix: string) => {
  214. if (suffix !== inputTokens) {
  215. matches.push(suffix);
  216. }
  217. }
  218. );
  219. walk(
  220. onMatches as any,
  221. res.node, // Performing DFS from prefix
  222. inputTokens
  223. );
  224. return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
  225. };
  226. /**
  227. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  228. */
  229. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  230. if (smolTree) {
  231. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  232. }
  233. const inputTokens = suffixToTokens(inputSuffix);
  234. const res = walkIntoLeafWithTokens(inputTokens);
  235. if (res === null) return;
  236. const onMatches = hostnameMode
  237. ? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
  238. : (suffix: string) => set.delete(suffix);
  239. walk(
  240. onMatches as any,
  241. res.node, // Performing DFS from prefix
  242. inputTokens
  243. );
  244. };
  245. /**
  246. * Method used to delete a prefix from the trie.
  247. */
  248. const remove = (suffix: string): boolean => {
  249. const res = getSingleChildLeaf(suffixToTokens(suffix));
  250. if (res === null) return false;
  251. if (!res.node[0]) return false;
  252. size--;
  253. const { node, toPrune, tokenToPrune } = res;
  254. if (tokenToPrune && toPrune) {
  255. toPrune[2].delete(tokenToPrune);
  256. } else {
  257. node[0] = false;
  258. }
  259. return true;
  260. };
  261. /**
  262. * Method used to assert whether the given prefix exists in the Trie.
  263. */
  264. const has = (suffix: string): boolean => {
  265. const tokens = suffixToTokens(suffix);
  266. const res = walkIntoLeafWithTokens(tokens);
  267. return res
  268. ? res.node[0]
  269. : false;
  270. };
  271. const dump = () => {
  272. const results: string[] = [];
  273. walk(suffix => {
  274. results.push(
  275. isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
  276. );
  277. });
  278. return results;
  279. };
  280. const whitelist = (suffix: string) => {
  281. if (!hostnameMode && !smolTree) {
  282. throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
  283. }
  284. const tokens = suffixToTokens(suffix);
  285. const res = getSingleChildLeaf(tokens);
  286. if (res === null) return;
  287. const { node, toPrune, tokenToPrune, parent } = res;
  288. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  289. if (tokens[0] === '.') {
  290. // If there is a `[start]sub.example.com` here, remove it
  291. parent[0] = false;
  292. // Removing all the child nodes by disconnecting "."
  293. parent[2].delete('.');
  294. }
  295. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  296. const dotNode = node[2].get('.');
  297. if (dotNode) {
  298. dotNode[0] = false;
  299. }
  300. // if (dotNode?.s === true) {
  301. // dotnode[0] = false;
  302. // }
  303. // return early if not found
  304. if (!node[0]) return;
  305. if (tokenToPrune && toPrune) {
  306. toPrune[2].delete(tokenToPrune);
  307. } else {
  308. node[0] = false;
  309. }
  310. };
  311. // Actually build trie
  312. if (Array.isArray(from)) {
  313. for (let i = 0, l = from.length; i < l; i++) {
  314. add(from[i]);
  315. }
  316. } else if (from) {
  317. from.forEach(add);
  318. }
  319. return {
  320. add,
  321. contains,
  322. find,
  323. substractSetInPlaceFromFound,
  324. remove,
  325. delete: remove,
  326. has,
  327. dump,
  328. get size() {
  329. if (smolTree) {
  330. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  331. }
  332. return size;
  333. },
  334. get root() {
  335. return root;
  336. },
  337. whitelist,
  338. [Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
  339. hostnameMode,
  340. smolTree
  341. };
  342. };
  343. export type Trie = ReturnType<typeof createTrie>;
  344. export default createTrie;