trie.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import { inspect } from 'node:util';
  6. const noop = () => { /** noop */ };
  7. type TrieNode = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode> /** children */
  11. ];
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node[0]) {
  15. obj['[start]'] = node[0];
  16. }
  17. node[2].forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. const createNode = (parent: TrieNode | null = null): TrieNode => {
  23. return [false, parent, new Map<string, TrieNode>()] as TrieNode;
  24. };
  25. export const hostnameToTokens = (hostname: string): string[] => {
  26. const tokens = hostname.split('.');
  27. const results: string[] = [];
  28. let token = '';
  29. for (let i = 0, l = tokens.length; i < l; i++) {
  30. if (i > 0) {
  31. results.push('.');
  32. }
  33. token = tokens[i];
  34. if (token.length > 0) {
  35. results.push(token);
  36. }
  37. }
  38. return results;
  39. };
  40. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  41. const tokens = hostname.split('.');
  42. let token = '';
  43. const l = tokens.length - 1;
  44. for (let i = l; i >= 0; i--) {
  45. if (
  46. i < l
  47. // when onToken returns true, we should skip the rest of the loop
  48. && onToken('.')
  49. ) {
  50. return true;
  51. }
  52. token = tokens[i];
  53. if (
  54. token.length > 0
  55. // when onToken returns true, we should skip the rest of the loop
  56. && onToken(token)
  57. ) {
  58. return true;
  59. }
  60. }
  61. return false;
  62. };
  63. export const createTrie = (from?: string[] | Set<string> | null, smolTree = false) => {
  64. let size = 0;
  65. const root: TrieNode = createNode();
  66. /**
  67. * Method used to add the given suffix to the trie.
  68. */
  69. const add = smolTree
  70. ? (suffix: string): void => {
  71. let node: TrieNode = root;
  72. const onToken = (token: string) => {
  73. if (node[2].has(token)) {
  74. node = node[2].get(token)!;
  75. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  76. // Dedupe the covered subdomain by skipping
  77. if (token === '.' && node[0]) {
  78. return true;
  79. }
  80. } else {
  81. const newNode = createNode(node);
  82. node[2].set(token, newNode);
  83. node = newNode;
  84. }
  85. return false;
  86. };
  87. // When walkHostnameTokens returns true, we should skip the rest
  88. if (walkHostnameTokens(suffix, onToken)) {
  89. return;
  90. }
  91. // If we are in smolTree mode, we need to do something at the end of the loop
  92. if (suffix[0] === '.') {
  93. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  94. const parent = node[1]!;
  95. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  96. parent[0] = false;
  97. // Removing the rest of the parent's child nodes
  98. node[2].clear();
  99. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  100. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  101. } else if (node[2].get('.')?.[0] === true) {
  102. // Trying to add `example.com` when there is already a `.example.com` in the trie
  103. // No need to increment size and set SENTINEL to true (skip this "new" item)
  104. return;
  105. }
  106. node[0] = true;
  107. }
  108. : (suffix: string): void => {
  109. let node: TrieNode = root;
  110. const onToken = (token: string) => {
  111. if (node[2].has(token)) {
  112. node = node[2].get(token)!;
  113. } else {
  114. const newNode = createNode(node);
  115. node[2].set(token, newNode);
  116. node = newNode;
  117. }
  118. return false;
  119. };
  120. // When walkHostnameTokens returns true, we should skip the rest
  121. if (walkHostnameTokens(suffix, onToken)) {
  122. return;
  123. }
  124. if (!node[0]) { // smol tree don't have size, so else-if here
  125. size++;
  126. node[0] = true;
  127. }
  128. };
  129. const walkIntoLeafWithTokens = (
  130. tokens: string[],
  131. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  132. ) => {
  133. let node: TrieNode = root;
  134. let parent: TrieNode = node;
  135. let token: string;
  136. for (let i = tokens.length - 1; i >= 0; i--) {
  137. token = tokens[i];
  138. if (token === '') {
  139. break;
  140. }
  141. parent = node;
  142. if (node[2].has(token)) {
  143. node = node[2].get(token)!;
  144. } else {
  145. return null;
  146. }
  147. onLoop(node, parent, token);
  148. }
  149. return { node, parent };
  150. };
  151. const walkIntoLeafWithSuffix = (
  152. suffix: string,
  153. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  154. ) => {
  155. let node: TrieNode = root;
  156. let parent: TrieNode = node;
  157. const onToken = (token: string) => {
  158. if (token === '') {
  159. return true;
  160. }
  161. parent = node;
  162. if (node[2].has(token)) {
  163. node = node[2].get(token)!;
  164. } else {
  165. return null;
  166. }
  167. onLoop(node, parent, token);
  168. return false;
  169. };
  170. if (walkHostnameTokens(suffix, onToken) === null) {
  171. return null;
  172. }
  173. return { node, parent };
  174. };
  175. const contains = (suffix: string): boolean => {
  176. return walkIntoLeafWithSuffix(suffix) !== null;
  177. };
  178. const walk = (
  179. onMatches: (suffix: string[]) => void,
  180. initialNode = root,
  181. initialSuffix: string[] = []
  182. ) => {
  183. const nodeStack: TrieNode[] = [initialNode];
  184. // Resolving initial string (begin the start of the stack)
  185. const suffixStack: string[][] = [initialSuffix];
  186. let node: TrieNode = root;
  187. do {
  188. node = nodeStack.pop()!;
  189. const suffix = suffixStack.pop()!;
  190. node[2].forEach((childNode, k) => {
  191. // Pushing the child node to the stack for next iteration of DFS
  192. nodeStack.push(childNode);
  193. suffixStack.push([k, ...suffix]);
  194. });
  195. // If the node is a sentinel, we push the suffix to the results
  196. if (node[0]) {
  197. onMatches(suffix);
  198. }
  199. } while (nodeStack.length);
  200. };
  201. interface FindSingleChildLeafResult {
  202. node: TrieNode,
  203. toPrune: TrieNode | null,
  204. tokenToPrune: string | null,
  205. parent: TrieNode
  206. }
  207. const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
  208. let toPrune: TrieNode | null = null;
  209. let tokenToPrune: string | null = null;
  210. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  211. // Keeping track of a potential branch to prune
  212. // Even if the node size is 1, but the single child is ".", we should retain the branch
  213. // Since the "." could be special if it is the leaf-est node
  214. const onlyChild = node[2].size < 2 && !node[2].has('.');
  215. if (toPrune != null) { // the top-est branch that could potentially being pruned
  216. if (!onlyChild) {
  217. // The branch has moew than single child, retain the branch.
  218. // And we need to abort prune the parent, so we set it to null
  219. toPrune = null;
  220. tokenToPrune = null;
  221. }
  222. } else if (onlyChild) {
  223. // There is only one token child, or no child at all, we can prune it safely
  224. // It is now the top-est branch that could potentially being pruned
  225. toPrune = parent;
  226. tokenToPrune = token;
  227. }
  228. };
  229. const res = walkIntoLeafWithTokens(tokens, onLoop);
  230. if (res === null) return null;
  231. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  232. };
  233. /**
  234. * Method used to retrieve every item in the trie with the given prefix.
  235. */
  236. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  237. if (smolTree) {
  238. throw new Error('A Trie with smolTree enabled cannot perform find!');
  239. }
  240. const inputTokens = hostnameToTokens(inputSuffix);
  241. const res = walkIntoLeafWithTokens(inputTokens);
  242. if (res === null) return [];
  243. const matches: string[][] = [];
  244. const onMatches = includeEqualWithSuffix
  245. ? (suffix: string[]) => matches.push(suffix)
  246. : (suffix: string[]) => {
  247. if (suffix.some((t, i) => t !== inputTokens[i])) {
  248. matches.push(suffix);
  249. }
  250. };
  251. walk(
  252. onMatches,
  253. res.node, // Performing DFS from prefix
  254. inputTokens
  255. );
  256. return matches.map((m) => fastStringArrayJoin(m, ''));
  257. };
  258. /**
  259. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  260. */
  261. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  262. if (smolTree) {
  263. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  264. }
  265. const inputTokens = hostnameToTokens(inputSuffix);
  266. const res = walkIntoLeafWithTokens(inputTokens);
  267. if (res === null) return;
  268. const onMatches = (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''));
  269. walk(
  270. onMatches,
  271. res.node, // Performing DFS from prefix
  272. inputTokens
  273. );
  274. };
  275. /**
  276. * Method used to delete a prefix from the trie.
  277. */
  278. const remove = (suffix: string): boolean => {
  279. const res = getSingleChildLeaf(hostnameToTokens(suffix));
  280. if (res === null) return false;
  281. if (!res.node[0]) return false;
  282. size--;
  283. const { node, toPrune, tokenToPrune } = res;
  284. if (tokenToPrune && toPrune) {
  285. toPrune[2].delete(tokenToPrune);
  286. } else {
  287. node[0] = false;
  288. }
  289. return true;
  290. };
  291. /**
  292. * Method used to assert whether the given prefix exists in the Trie.
  293. */
  294. const has = (suffix: string): boolean => {
  295. const res = walkIntoLeafWithSuffix(suffix);
  296. return res
  297. ? res.node[0]
  298. : false;
  299. };
  300. const dump = () => {
  301. const results: string[] = [];
  302. walk(suffix => {
  303. results.push(fastStringArrayJoin(suffix, ''));
  304. });
  305. return results;
  306. };
  307. const whitelist = (suffix: string) => {
  308. if (!smolTree) {
  309. throw new Error('whitelist method is only available in smolTree mode.');
  310. }
  311. const tokens = hostnameToTokens(suffix);
  312. const res = getSingleChildLeaf(tokens);
  313. if (res === null) return;
  314. const { node, toPrune, tokenToPrune, parent } = res;
  315. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  316. if (tokens[0] === '.') {
  317. // If there is a `[start]sub.example.com` here, remove it
  318. parent[0] = false;
  319. // Removing all the child nodes by disconnecting "."
  320. parent[2].delete('.');
  321. }
  322. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  323. const dotNode = node[2].get('.');
  324. if (dotNode) {
  325. dotNode[0] = false;
  326. }
  327. // if (dotNode?.s === true) {
  328. // dotnode[0] = false;
  329. // }
  330. // return early if not found
  331. if (!node[0]) return;
  332. if (tokenToPrune && toPrune) {
  333. toPrune[2].delete(tokenToPrune);
  334. } else {
  335. node[0] = false;
  336. }
  337. };
  338. // Actually build trie
  339. if (Array.isArray(from)) {
  340. for (let i = 0, l = from.length; i < l; i++) {
  341. add(from[i]);
  342. }
  343. } else if (from) {
  344. from.forEach(add);
  345. }
  346. return {
  347. add,
  348. contains,
  349. find,
  350. substractSetInPlaceFromFound,
  351. remove,
  352. delete: remove,
  353. has,
  354. dump,
  355. get size() {
  356. if (smolTree) {
  357. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  358. }
  359. return size;
  360. },
  361. get root() {
  362. return root;
  363. },
  364. whitelist,
  365. [inspect.custom]: (depth: number) => fastStringArrayJoin(
  366. JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  367. '\n'
  368. ),
  369. smolTree
  370. };
  371. };
  372. export type Trie = ReturnType<typeof createTrie>;
  373. export default createTrie;