trie.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. function deepTrieNodeToJSON(node: TrieNode,
  14. unpackMeta: ((meta?: any) => string) | undefined) {
  15. const obj: Record<string, any> = {};
  16. if (node[0]) {
  17. obj['[start]'] = node[0];
  18. }
  19. if (node[3] != null) {
  20. if (unpackMeta) {
  21. obj['[meta]'] = unpackMeta(node[3]);
  22. } else {
  23. obj['[meta]'] = node[3];
  24. }
  25. }
  26. node[2].forEach((value, key) => {
  27. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  28. });
  29. return obj;
  30. }
  31. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  32. export function hostnameToTokens(hostname: string): string[] {
  33. const tokens = hostname.split('.');
  34. const results: string[] = [];
  35. let token = '';
  36. for (let i = 0, l = tokens.length; i < l; i++) {
  37. if (i > 0) {
  38. results.push('.');
  39. }
  40. token = tokens[i];
  41. if (token.length > 0) {
  42. results.push(token);
  43. }
  44. }
  45. return results;
  46. }
  47. function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
  48. const tokens = hostname.split('.');
  49. let token = '';
  50. const l = tokens.length - 1;
  51. for (let i = l; i >= 0; i--) {
  52. if (
  53. i < l // when i === l, we are at the first of hostname, no splitor there
  54. // when onToken returns true, we should skip the rest of the loop
  55. && onToken('.')
  56. ) {
  57. return true;
  58. }
  59. token = tokens[i];
  60. if (
  61. token.length > 0
  62. // when onToken returns true, we should skip the rest of the loop
  63. && onToken(token)
  64. ) {
  65. return true;
  66. }
  67. }
  68. return false;
  69. }
  70. interface FindSingleChildLeafResult<Meta> {
  71. node: TrieNode<Meta>,
  72. toPrune: TrieNode<Meta> | null,
  73. tokenToPrune: string | null,
  74. parent: TrieNode<Meta>
  75. }
  76. abstract class Triebase<Meta = any> {
  77. protected readonly $root: TrieNode<Meta> = createNode();
  78. protected $size = 0;
  79. get root() {
  80. return this.$root;
  81. }
  82. constructor(from?: string[] | Set<string> | null) {
  83. // Actually build trie
  84. if (Array.isArray(from)) {
  85. for (let i = 0, l = from.length; i < l; i++) {
  86. this.add(from[i]);
  87. }
  88. } else if (from) {
  89. from.forEach((value) => this.add(value));
  90. }
  91. }
  92. public abstract add(suffix: string, meta?: Meta): void;
  93. protected walkIntoLeafWithTokens(
  94. tokens: string[],
  95. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  96. ) {
  97. let node: TrieNode = this.$root;
  98. let parent: TrieNode = node;
  99. let token: string;
  100. for (let i = tokens.length - 1; i >= 0; i--) {
  101. token = tokens[i];
  102. // if (token === '') {
  103. // break;
  104. // }
  105. parent = node;
  106. if (node[2].has(token)) {
  107. node = node[2].get(token)!;
  108. } else {
  109. return null;
  110. }
  111. onLoop(node, parent, token);
  112. }
  113. return { node, parent };
  114. };
  115. protected walkIntoLeafWithSuffix(
  116. suffix: string,
  117. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  118. ) {
  119. let node: TrieNode = this.$root;
  120. let parent: TrieNode = node;
  121. const onToken = (token: string) => {
  122. if (token === '') {
  123. return true;
  124. }
  125. parent = node;
  126. if (node[2].has(token)) {
  127. node = node[2].get(token)!;
  128. } else {
  129. return null;
  130. }
  131. onLoop(node, parent, token);
  132. return false;
  133. };
  134. if (walkHostnameTokens(suffix, onToken) === null) {
  135. return null;
  136. }
  137. return { node, parent };
  138. };
  139. public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
  140. private walk(
  141. onMatches: (suffix: string[], meta: Meta) => void,
  142. initialNode = this.$root,
  143. initialSuffix: string[] = []
  144. ) {
  145. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  146. // Resolving initial string (begin the start of the stack)
  147. const suffixStack: string[][] = [initialSuffix];
  148. let node: TrieNode<Meta> = initialNode;
  149. do {
  150. node = nodeStack.pop()!;
  151. const suffix = suffixStack.pop()!;
  152. node[2].forEach((childNode, k) => {
  153. // Pushing the child node to the stack for next iteration of DFS
  154. nodeStack.push(childNode);
  155. suffixStack.push([k, ...suffix]);
  156. });
  157. // If the node is a sentinel, we push the suffix to the results
  158. if (node[0]) {
  159. onMatches(suffix, node[3]);
  160. }
  161. } while (nodeStack.length);
  162. };
  163. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  164. let toPrune: TrieNode | null = null;
  165. let tokenToPrune: string | null = null;
  166. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  167. // Keeping track of a potential branch to prune
  168. // Even if the node size is 1, but the single child is ".", we should retain the branch
  169. // Since the "." could be special if it is the leaf-est node
  170. const onlyChild = node[2].size < 2 && !node[2].has('.');
  171. if (toPrune != null) { // the top-est branch that could potentially being pruned
  172. if (!onlyChild) {
  173. // The branch has moew than single child, retain the branch.
  174. // And we need to abort prune the parent, so we set it to null
  175. toPrune = null;
  176. tokenToPrune = null;
  177. }
  178. } else if (onlyChild) {
  179. // There is only one token child, or no child at all, we can prune it safely
  180. // It is now the top-est branch that could potentially being pruned
  181. toPrune = parent;
  182. tokenToPrune = token;
  183. }
  184. };
  185. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  186. if (res === null) return null;
  187. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  188. };
  189. /**
  190. * Method used to retrieve every item in the trie with the given prefix.
  191. */
  192. public find(
  193. inputSuffix: string,
  194. /** @default true */ includeEqualWithSuffix = true
  195. ): string[] {
  196. // if (smolTree) {
  197. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  198. // }
  199. const inputTokens = hostnameToTokens(inputSuffix);
  200. const res = this.walkIntoLeafWithTokens(inputTokens);
  201. if (res === null) return [];
  202. const matches: string[][] = [];
  203. const onMatches = includeEqualWithSuffix
  204. // fast path (default option)
  205. ? (suffix: string[]) => matches.push(suffix)
  206. // slow path
  207. : (suffix: string[]) => {
  208. if (!deepEqualArray(suffix, inputTokens)) {
  209. matches.push(suffix);
  210. }
  211. };
  212. this.walk(
  213. onMatches,
  214. res.node, // Performing DFS from prefix
  215. inputTokens
  216. );
  217. return matches.map((m) => fastStringArrayJoin(m, ''));
  218. };
  219. /**
  220. * Method used to delete a prefix from the trie.
  221. */
  222. public remove(suffix: string): boolean {
  223. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  224. if (res === null) return false;
  225. if (!res.node[0]) return false;
  226. this.$size--;
  227. const { node, toPrune, tokenToPrune } = res;
  228. if (tokenToPrune && toPrune) {
  229. toPrune[2].delete(tokenToPrune);
  230. } else {
  231. node[0] = false;
  232. }
  233. return true;
  234. };
  235. // eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
  236. public delete = this.remove;
  237. /**
  238. * Method used to assert whether the given prefix exists in the Trie.
  239. */
  240. public has(suffix: string): boolean {
  241. const res = this.walkIntoLeafWithSuffix(suffix);
  242. return res
  243. ? res.node[0]
  244. : false;
  245. };
  246. public dump(onSuffix: (suffix: string) => void): void;
  247. public dump(): string[];
  248. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  249. const results: string[] = [];
  250. const handleSuffix = onSuffix
  251. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  252. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  253. this.walk(handleSuffix);
  254. return results;
  255. };
  256. public dumpMeta(onMeta: (meta: Meta) => void): void;
  257. public dumpMeta(): Meta[];
  258. public dumpMeta(onMeta?: (meta: Meta) => void): Meta[] | void {
  259. const results: Meta[] = [];
  260. const handleMeta = onMeta
  261. ? (_suffix: string[], meta: Meta) => onMeta(meta)
  262. : (_suffix: string[], meta: Meta) => results.push(meta);
  263. this.walk(handleMeta);
  264. return results;
  265. };
  266. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
  267. public dumpWithMeta(): string[];
  268. public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): string[] | void {
  269. const results: string[] = [];
  270. const handleSuffix = onSuffix
  271. ? (suffix: string[], meta: Meta | undefined) => onSuffix(fastStringArrayJoin(suffix, ''), meta)
  272. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  273. this.walk(handleSuffix);
  274. return results;
  275. };
  276. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  277. return fastStringArrayJoin(
  278. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  279. '\n'
  280. );
  281. }
  282. public [util.inspect.custom](depth: number) {
  283. return this.inspect(depth);
  284. };
  285. }
  286. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  287. public smolTree = true;
  288. add(suffix: string, meta?: Meta): void {
  289. let node: TrieNode<Meta> = this.$root;
  290. let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
  291. const onToken = (token: string) => {
  292. curNodeChildren = node[2];
  293. if (curNodeChildren.has(token)) {
  294. node = curNodeChildren.get(token)!;
  295. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  296. if (node[0] && token === '.') {
  297. return true;
  298. }
  299. } else {
  300. const newNode = createNode(node);
  301. curNodeChildren.set(token, newNode);
  302. node = newNode;
  303. }
  304. return false;
  305. };
  306. // When walkHostnameTokens returns true, we should skip the rest
  307. if (walkHostnameTokens(suffix, onToken)) {
  308. return;
  309. }
  310. // If we are in smolTree mode, we need to do something at the end of the loop
  311. if (suffix[0] === '.') {
  312. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  313. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  314. (/** parent */ node[1]!)[0] = false;
  315. // Removing the rest of the parent's child nodes
  316. node[2].clear();
  317. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  318. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  319. } else if (node[2].get('.')?.[0] === true) {
  320. // Trying to add `example.com` when there is already a `.example.com` in the trie
  321. // No need to increment size and set SENTINEL to true (skip this "new" item)
  322. return;
  323. }
  324. node[0] = true;
  325. node[3] = meta!;
  326. }
  327. public whitelist(suffix: string) {
  328. const tokens = hostnameToTokens(suffix);
  329. const res = this.getSingleChildLeaf(tokens);
  330. if (res === null) return;
  331. const { node, toPrune, tokenToPrune, parent } = res;
  332. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  333. if (tokens[0] === '.') {
  334. // If there is a `[start]sub.example.com` here, remove it
  335. parent[0] = false;
  336. // Removing all the child nodes by empty the children
  337. // This removes the only child ".", which removes "blog.sub.example.com"
  338. parent[2].clear();
  339. } else {
  340. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  341. const dotNode = node[2].get('.');
  342. if (dotNode) {
  343. dotNode[0] = false;
  344. }
  345. }
  346. // return early if not found
  347. if (!node[0]) return;
  348. if (tokenToPrune && toPrune) {
  349. toPrune[2].delete(tokenToPrune);
  350. } else {
  351. node[0] = false;
  352. }
  353. };
  354. }
  355. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  356. get size() {
  357. return this.$size;
  358. }
  359. add(suffix: string, meta?: Meta): void {
  360. let node: TrieNode<Meta> = this.$root;
  361. const onToken = (token: string) => {
  362. if (node[2].has(token)) {
  363. node = node[2].get(token)!;
  364. } else {
  365. const newNode = createNode(node);
  366. node[2].set(token, newNode);
  367. node = newNode;
  368. }
  369. return false;
  370. };
  371. // When walkHostnameTokens returns true, we should skip the rest
  372. if (walkHostnameTokens(suffix, onToken)) {
  373. return;
  374. }
  375. if (!node[0]) {
  376. this.$size++;
  377. node[0] = true;
  378. node[3] = meta!;
  379. }
  380. }
  381. }
  382. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  383. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  384. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  385. if (smolTree) {
  386. return new HostnameSmolTrie(from);
  387. }
  388. return new HostnameTrie(from);
  389. };
  390. export type Trie = ReturnType<typeof createTrie>;
  391. function deepEqualArray(a: string[], b: string[]) {
  392. let len = a.length;
  393. if (len !== b.length) return false;
  394. while (len--) {
  395. if (a[len] !== b[len]) return false;
  396. }
  397. return true;
  398. };