trie.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import { inspect } from 'node:util';
  6. const noop = () => { /** noop */ };
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (node: TrieNode) => {
  14. const obj: Record<string, any> = {};
  15. if (node[0]) {
  16. obj['[start]'] = node[0];
  17. }
  18. if (node[3] !== undefined) {
  19. obj['[meta]'] = node[3];
  20. }
  21. node[2].forEach((value, key) => {
  22. obj[key] = deepTrieNodeToJSON(value);
  23. });
  24. return obj;
  25. };
  26. const createNode = <Meta = any>(parent: TrieNode | null = null, meta: Meta | null = null): TrieNode => {
  27. return [false, parent, new Map<string, TrieNode>(), meta] as TrieNode<Meta>;
  28. };
  29. export const hostnameToTokens = (hostname: string): string[] => {
  30. const tokens = hostname.split('.');
  31. const results: string[] = [];
  32. let token = '';
  33. for (let i = 0, l = tokens.length; i < l; i++) {
  34. if (i > 0) {
  35. results.push('.');
  36. }
  37. token = tokens[i];
  38. if (token.length > 0) {
  39. results.push(token);
  40. }
  41. }
  42. return results;
  43. };
  44. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  45. const tokens = hostname.split('.');
  46. let token = '';
  47. const l = tokens.length - 1;
  48. for (let i = l; i >= 0; i--) {
  49. if (
  50. i < l
  51. // when onToken returns true, we should skip the rest of the loop
  52. && onToken('.')
  53. ) {
  54. return true;
  55. }
  56. token = tokens[i];
  57. if (
  58. token.length > 0
  59. // when onToken returns true, we should skip the rest of the loop
  60. && onToken(token)
  61. ) {
  62. return true;
  63. }
  64. }
  65. return false;
  66. };
  67. export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
  68. let size = 0;
  69. const root: TrieNode<Meta> = createNode();
  70. /**
  71. * Method used to add the given suffix to the trie.
  72. */
  73. const add = smolTree
  74. ? (suffix: string, meta?: Meta): void => {
  75. let node: TrieNode<Meta> = root;
  76. const onToken = (token: string) => {
  77. if (node[2].has(token)) {
  78. node = node[2].get(token)!;
  79. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  80. // Dedupe the covered subdomain by skipping
  81. if (token === '.' && node[0]) {
  82. return true;
  83. }
  84. } else {
  85. const newNode = createNode(node);
  86. node[2].set(token, newNode);
  87. node = newNode;
  88. }
  89. node[3] = meta!;
  90. return false;
  91. };
  92. // When walkHostnameTokens returns true, we should skip the rest
  93. if (walkHostnameTokens(suffix, onToken)) {
  94. return;
  95. }
  96. // If we are in smolTree mode, we need to do something at the end of the loop
  97. if (suffix[0] === '.') {
  98. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  99. const parent = node[1]!;
  100. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  101. parent[0] = false;
  102. // Removing the rest of the parent's child nodes
  103. node[2].clear();
  104. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  105. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  106. } else if (node[2].get('.')?.[0] === true) {
  107. // Trying to add `example.com` when there is already a `.example.com` in the trie
  108. // No need to increment size and set SENTINEL to true (skip this "new" item)
  109. return;
  110. }
  111. node[0] = true;
  112. }
  113. : (suffix: string, meta?: Meta): void => {
  114. let node: TrieNode<Meta> = root;
  115. const onToken = (token: string) => {
  116. if (node[2].has(token)) {
  117. node = node[2].get(token)!;
  118. } else {
  119. const newNode = createNode(node);
  120. node[2].set(token, newNode);
  121. node = newNode;
  122. }
  123. node[3] = meta!;
  124. return false;
  125. };
  126. // When walkHostnameTokens returns true, we should skip the rest
  127. if (walkHostnameTokens(suffix, onToken)) {
  128. return;
  129. }
  130. if (!node[0]) { // smol tree don't have size, so else-if here
  131. size++;
  132. node[0] = true;
  133. }
  134. };
  135. const walkIntoLeafWithTokens = (
  136. tokens: string[],
  137. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  138. ) => {
  139. let node: TrieNode = root;
  140. let parent: TrieNode = node;
  141. let token: string;
  142. for (let i = tokens.length - 1; i >= 0; i--) {
  143. token = tokens[i];
  144. if (token === '') {
  145. break;
  146. }
  147. parent = node;
  148. if (node[2].has(token)) {
  149. node = node[2].get(token)!;
  150. } else {
  151. return null;
  152. }
  153. onLoop(node, parent, token);
  154. }
  155. return { node, parent };
  156. };
  157. const walkIntoLeafWithSuffix = (
  158. suffix: string,
  159. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  160. ) => {
  161. let node: TrieNode = root;
  162. let parent: TrieNode = node;
  163. const onToken = (token: string) => {
  164. if (token === '') {
  165. return true;
  166. }
  167. parent = node;
  168. if (node[2].has(token)) {
  169. node = node[2].get(token)!;
  170. } else {
  171. return null;
  172. }
  173. onLoop(node, parent, token);
  174. return false;
  175. };
  176. if (walkHostnameTokens(suffix, onToken) === null) {
  177. return null;
  178. }
  179. return { node, parent };
  180. };
  181. const contains = (suffix: string): boolean => {
  182. return walkIntoLeafWithSuffix(suffix) !== null;
  183. };
  184. const walk = (
  185. onMatches: (suffix: string[], meta: Meta) => void,
  186. initialNode = root,
  187. initialSuffix: string[] = []
  188. ) => {
  189. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  190. // Resolving initial string (begin the start of the stack)
  191. const suffixStack: string[][] = [initialSuffix];
  192. let node: TrieNode<Meta> = root;
  193. do {
  194. node = nodeStack.pop()!;
  195. const suffix = suffixStack.pop()!;
  196. node[2].forEach((childNode, k) => {
  197. // Pushing the child node to the stack for next iteration of DFS
  198. nodeStack.push(childNode);
  199. suffixStack.push([k, ...suffix]);
  200. });
  201. // If the node is a sentinel, we push the suffix to the results
  202. if (node[0]) {
  203. onMatches(suffix, node[3]);
  204. }
  205. } while (nodeStack.length);
  206. };
  207. interface FindSingleChildLeafResult {
  208. node: TrieNode,
  209. toPrune: TrieNode | null,
  210. tokenToPrune: string | null,
  211. parent: TrieNode
  212. }
  213. const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
  214. let toPrune: TrieNode | null = null;
  215. let tokenToPrune: string | null = null;
  216. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  217. // Keeping track of a potential branch to prune
  218. // Even if the node size is 1, but the single child is ".", we should retain the branch
  219. // Since the "." could be special if it is the leaf-est node
  220. const onlyChild = node[2].size < 2 && !node[2].has('.');
  221. if (toPrune != null) { // the top-est branch that could potentially being pruned
  222. if (!onlyChild) {
  223. // The branch has moew than single child, retain the branch.
  224. // And we need to abort prune the parent, so we set it to null
  225. toPrune = null;
  226. tokenToPrune = null;
  227. }
  228. } else if (onlyChild) {
  229. // There is only one token child, or no child at all, we can prune it safely
  230. // It is now the top-est branch that could potentially being pruned
  231. toPrune = parent;
  232. tokenToPrune = token;
  233. }
  234. };
  235. const res = walkIntoLeafWithTokens(tokens, onLoop);
  236. if (res === null) return null;
  237. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  238. };
  239. /**
  240. * Method used to retrieve every item in the trie with the given prefix.
  241. */
  242. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  243. if (smolTree) {
  244. throw new Error('A Trie with smolTree enabled cannot perform find!');
  245. }
  246. const inputTokens = hostnameToTokens(inputSuffix);
  247. const res = walkIntoLeafWithTokens(inputTokens);
  248. if (res === null) return [];
  249. const matches: string[][] = [];
  250. const onMatches = includeEqualWithSuffix
  251. ? (suffix: string[]) => matches.push(suffix)
  252. : (suffix: string[]) => {
  253. if (suffix.some((t, i) => t !== inputTokens[i])) {
  254. matches.push(suffix);
  255. }
  256. };
  257. walk(
  258. onMatches,
  259. res.node, // Performing DFS from prefix
  260. inputTokens
  261. );
  262. return matches.map((m) => fastStringArrayJoin(m, ''));
  263. };
  264. /**
  265. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  266. */
  267. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  268. if (smolTree) {
  269. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  270. }
  271. const inputTokens = hostnameToTokens(inputSuffix);
  272. const res = walkIntoLeafWithTokens(inputTokens);
  273. if (res === null) return;
  274. const onMatches = (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''));
  275. walk(
  276. onMatches,
  277. res.node, // Performing DFS from prefix
  278. inputTokens
  279. );
  280. };
  281. /**
  282. * Method used to delete a prefix from the trie.
  283. */
  284. const remove = (suffix: string): boolean => {
  285. const res = getSingleChildLeaf(hostnameToTokens(suffix));
  286. if (res === null) return false;
  287. if (!res.node[0]) return false;
  288. size--;
  289. const { node, toPrune, tokenToPrune } = res;
  290. if (tokenToPrune && toPrune) {
  291. toPrune[2].delete(tokenToPrune);
  292. } else {
  293. node[0] = false;
  294. }
  295. return true;
  296. };
  297. /**
  298. * Method used to assert whether the given prefix exists in the Trie.
  299. */
  300. const has = (suffix: string): boolean => {
  301. const res = walkIntoLeafWithSuffix(suffix);
  302. return res
  303. ? res.node[0]
  304. : false;
  305. };
  306. const dump = () => {
  307. const results: string[] = [];
  308. walk(suffix => {
  309. results.push(fastStringArrayJoin(suffix, ''));
  310. });
  311. return results;
  312. };
  313. const dumpWithMeta = () => {
  314. const results: Array<[string, Meta]> = [];
  315. walk((suffix, meta) => {
  316. results.push([fastStringArrayJoin(suffix, ''), meta]);
  317. });
  318. return results;
  319. };
  320. const whitelist = (suffix: string) => {
  321. if (!smolTree) {
  322. throw new Error('whitelist method is only available in smolTree mode.');
  323. }
  324. const tokens = hostnameToTokens(suffix);
  325. const res = getSingleChildLeaf(tokens);
  326. if (res === null) return;
  327. const { node, toPrune, tokenToPrune, parent } = res;
  328. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  329. if (tokens[0] === '.') {
  330. // If there is a `[start]sub.example.com` here, remove it
  331. parent[0] = false;
  332. // Removing all the child nodes by disconnecting "."
  333. parent[2].delete('.');
  334. }
  335. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  336. const dotNode = node[2].get('.');
  337. if (dotNode) {
  338. dotNode[0] = false;
  339. }
  340. // if (dotNode?.s === true) {
  341. // dotnode[0] = false;
  342. // }
  343. // return early if not found
  344. if (!node[0]) return;
  345. if (tokenToPrune && toPrune) {
  346. toPrune[2].delete(tokenToPrune);
  347. } else {
  348. node[0] = false;
  349. }
  350. };
  351. // Actually build trie
  352. if (Array.isArray(from)) {
  353. for (let i = 0, l = from.length; i < l; i++) {
  354. add(from[i]);
  355. }
  356. } else if (from) {
  357. from.forEach((value) => add(value));
  358. }
  359. return {
  360. add,
  361. contains,
  362. find,
  363. substractSetInPlaceFromFound,
  364. remove,
  365. delete: remove,
  366. has,
  367. dump,
  368. dumpWithMeta,
  369. get size() {
  370. if (smolTree) {
  371. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  372. }
  373. return size;
  374. },
  375. get root() {
  376. return root;
  377. },
  378. whitelist,
  379. [inspect.custom]: (depth: number) => fastStringArrayJoin(
  380. JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  381. '\n'
  382. ),
  383. smolTree
  384. };
  385. };
  386. export type Trie = ReturnType<typeof createTrie>;