trie.ts 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringCompare } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxts/noop';
  7. import { fastStringArrayJoin } from 'foxts/fast-string-array-join';
  8. import FIFO from 'fast-fifo';
  9. type TrieNode<Meta = any> = [
  10. boolean, /** end */
  11. boolean, /** includeAllSubdomain (.example.org, ||example.com) */
  12. TrieNode | null, /** parent */
  13. Map<string, TrieNode>, /** children */
  14. Meta /** meta */
  15. ];
  16. function deepTrieNodeToJSON(node: TrieNode,
  17. unpackMeta: ((meta?: any) => string) | undefined) {
  18. const obj: Record<string, any> = {};
  19. if (node[0]) {
  20. obj['[start]'] = node[0];
  21. }
  22. obj['[subdomain]'] = node[1];
  23. if (node[4] != null) {
  24. if (unpackMeta) {
  25. obj['[meta]'] = unpackMeta(node[3]);
  26. } else {
  27. obj['[meta]'] = node[3];
  28. }
  29. }
  30. node[3].forEach((value, key) => {
  31. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  32. });
  33. return obj;
  34. }
  35. const createNode = <Meta = any>(allSubdomain = false, parent: TrieNode | null = null): TrieNode => [false, allSubdomain, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  36. export function hostnameToTokens(hostname: string, hostnameFromIndex: number): string[] {
  37. const tokens = hostname.split('.');
  38. const results: string[] = [];
  39. let token = '';
  40. for (let i = hostnameFromIndex, l = tokens.length; i < l; i++) {
  41. token = tokens[i];
  42. if (token.length > 0) {
  43. results.push(token);
  44. }
  45. }
  46. return results;
  47. }
  48. function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null, hostnameFromIndex: number): boolean | null {
  49. const tokens = hostname.split('.');
  50. const l = tokens.length - 1;
  51. // we are at the first of hostname, no splitor there
  52. let token = '';
  53. for (let i = l; i >= hostnameFromIndex; i--) {
  54. token = tokens[i];
  55. if (token.length > 0) {
  56. const t = onToken(token);
  57. if (t === null) {
  58. return null;
  59. }
  60. // if the callback returns true, we should skip the rest
  61. if (t) {
  62. return true;
  63. }
  64. }
  65. }
  66. return false;
  67. }
  68. interface FindSingleChildLeafResult<Meta> {
  69. node: TrieNode<Meta>,
  70. toPrune: TrieNode<Meta> | null,
  71. tokenToPrune: string | null,
  72. parent: TrieNode<Meta>
  73. }
  74. abstract class Triebase<Meta = any> {
  75. protected readonly $root: TrieNode<Meta> = createNode();
  76. protected $size = 0;
  77. get root() {
  78. return this.$root;
  79. }
  80. constructor(from?: string[] | Set<string> | null) {
  81. // Actually build trie
  82. if (Array.isArray(from)) {
  83. for (let i = 0, l = from.length; i < l; i++) {
  84. this.add(from[i]);
  85. }
  86. } else if (from) {
  87. from.forEach((value) => this.add(value));
  88. }
  89. }
  90. public abstract add(suffix: string, includeAllSubdomain?: boolean, meta?: Meta, hostnameFromIndex?: number): void;
  91. protected walkIntoLeafWithTokens(
  92. tokens: string[],
  93. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  94. ) {
  95. let node: TrieNode = this.$root;
  96. let parent: TrieNode = node;
  97. let token: string;
  98. for (let i = tokens.length - 1; i >= 0; i--) {
  99. token = tokens[i];
  100. // if (token === '') {
  101. // break;
  102. // }
  103. parent = node;
  104. if (node[3].has(token)) {
  105. node = node[3].get(token)!;
  106. } else {
  107. return null;
  108. }
  109. onLoop(node, parent, token);
  110. }
  111. return { node, parent };
  112. };
  113. protected walkIntoLeafWithSuffix(
  114. suffix: string,
  115. hostnameFromIndex: number,
  116. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  117. ) {
  118. let node: TrieNode = this.$root;
  119. let parent: TrieNode = node;
  120. const onToken = (token: string) => {
  121. // if (token === '') {
  122. // return true;
  123. // }
  124. parent = node;
  125. if (node[3].has(token)) {
  126. node = node[3].get(token)!;
  127. } else {
  128. return null;
  129. }
  130. onLoop(node, parent, token);
  131. return false;
  132. };
  133. if (walkHostnameTokens(suffix, onToken, hostnameFromIndex) === null) {
  134. return null;
  135. }
  136. return { node, parent };
  137. };
  138. public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
  139. let hostnameFromIndex = 0;
  140. if (suffix[0] === '.') {
  141. hostnameFromIndex = 1;
  142. }
  143. const res = this.walkIntoLeafWithSuffix(suffix, hostnameFromIndex);
  144. if (!res) return false;
  145. if (includeAllSubdomain) return res.node[1];
  146. return true;
  147. };
  148. private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []];
  149. private static bfs<Meta>(this: void, nodeStack: FIFO<TrieNode<Meta>>, suffixStack: FIFO<string[]>) {
  150. const node = nodeStack.shift()!;
  151. const suffix = suffixStack.shift()!;
  152. node[3].forEach((childNode, k) => {
  153. // Pushing the child node to the stack for next iteration of DFS
  154. nodeStack.push(childNode);
  155. suffixStack.push([k, ...suffix]);
  156. });
  157. Triebase.bfsResults[0] = node;
  158. Triebase.bfsResults[1] = suffix;
  159. return Triebase.bfsResults;
  160. }
  161. private static bfsWithSort<Meta>(this: void, nodeStack: FIFO<TrieNode<Meta>>, suffixStack: FIFO<string[]>) {
  162. const node = nodeStack.shift()!;
  163. const suffix = suffixStack.shift()!;
  164. if (node[3].size) {
  165. const keys = Array.from(node[3].keys()).sort(Triebase.compare);
  166. for (let i = 0, l = keys.length; i < l; i++) {
  167. const key = keys[i];
  168. const childNode = node[3].get(key)!;
  169. // Pushing the child node to the stack for next iteration of DFS
  170. nodeStack.push(childNode);
  171. suffixStack.push([key, ...suffix]);
  172. }
  173. }
  174. Triebase.bfsResults[0] = node;
  175. Triebase.bfsResults[1] = suffix;
  176. return Triebase.bfsResults;
  177. }
  178. private walk(
  179. onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
  180. initialNode = this.$root,
  181. initialSuffix: string[] = [],
  182. withSort = false
  183. ) {
  184. const bfsImpl = withSort ? Triebase.bfsWithSort : Triebase.bfs;
  185. const nodeStack = new FIFO<TrieNode<Meta>>();
  186. nodeStack.push(initialNode);
  187. // Resolving initial string (begin the start of the stack)
  188. const suffixStack = new FIFO<string[]>();
  189. suffixStack.push(initialSuffix);
  190. let node: TrieNode<Meta> = initialNode;
  191. let r;
  192. do {
  193. r = bfsImpl(nodeStack, suffixStack);
  194. node = r[0]!;
  195. const suffix = r[1];
  196. // If the node is a sentinel, we push the suffix to the results
  197. if (node[0]) {
  198. onMatches(suffix, node[1], node[4]);
  199. }
  200. } while (nodeStack.length);
  201. };
  202. static compare(this: void, a: string, b: string) {
  203. if (a === b) return 0;
  204. return (a.length - b.length) || fastStringCompare(a, b);
  205. }
  206. private walkWithSort(
  207. onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
  208. initialNode = this.$root,
  209. initialSuffix: string[] = []
  210. ) {
  211. const nodeStack = new FIFO<TrieNode<Meta>>();
  212. nodeStack.push(initialNode);
  213. // Resolving initial string (begin the start of the stack)
  214. const suffixStack = new FIFO<string[]>();
  215. suffixStack.push(initialSuffix);
  216. let node: TrieNode<Meta> = initialNode;
  217. do {
  218. node = nodeStack.shift()!;
  219. const suffix = suffixStack.shift()!;
  220. if (node[3].size) {
  221. const keys = Array.from(node[3].keys()).sort(Triebase.compare);
  222. for (let i = 0, l = keys.length; i < l; i++) {
  223. const key = keys[i];
  224. const childNode = node[3].get(key)!;
  225. // Pushing the child node to the stack for next iteration of DFS
  226. nodeStack.push(childNode);
  227. suffixStack.push([key, ...suffix]);
  228. }
  229. }
  230. // If the node is a sentinel, we push the suffix to the results
  231. if (node[0]) {
  232. onMatches(suffix, node[1], node[4]);
  233. }
  234. } while (nodeStack.length);
  235. };
  236. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  237. let toPrune: TrieNode | null = null;
  238. let tokenToPrune: string | null = null;
  239. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  240. // Keeping track of a potential branch to prune
  241. // Even if the node size is 1, but the single child is ".", we should retain the branch
  242. // Since the "." could be special if it is the leaf-est node
  243. const onlyChild = node[3].size === 0 && !node[2];
  244. if (toPrune != null) { // the top-est branch that could potentially being pruned
  245. if (!onlyChild) {
  246. // The branch has moew than single child, retain the branch.
  247. // And we need to abort prune the parent, so we set it to null
  248. toPrune = null;
  249. tokenToPrune = null;
  250. }
  251. } else if (onlyChild) {
  252. // There is only one token child, or no child at all, we can prune it safely
  253. // It is now the top-est branch that could potentially being pruned
  254. toPrune = parent;
  255. tokenToPrune = token;
  256. }
  257. };
  258. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  259. if (res === null) return null;
  260. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  261. };
  262. /**
  263. * Method used to retrieve every item in the trie with the given prefix.
  264. */
  265. public find(
  266. inputSuffix: string,
  267. subdomainOnly = inputSuffix[0] === '.',
  268. hostnameFromIndex = 0
  269. // /** @default true */ includeEqualWithSuffix = true
  270. ): string[] {
  271. if (inputSuffix[0] === '.') {
  272. hostnameFromIndex = 1;
  273. }
  274. const inputTokens = hostnameToTokens(inputSuffix, hostnameFromIndex);
  275. const res = this.walkIntoLeafWithTokens(inputTokens);
  276. if (res === null) return [];
  277. const results: string[] = [];
  278. const onMatches = subdomainOnly
  279. ? (suffix: string[], subdomain: boolean) => { // fast path (default option)
  280. const d = fastStringArrayJoin(suffix, '.');
  281. if (!subdomain && subStringEqual(inputSuffix, d, 1)) return;
  282. results.push(subdomain ? '.' + d : d);
  283. }
  284. : (suffix: string[], subdomain: boolean) => { // fast path (default option)
  285. const d = fastStringArrayJoin(suffix, '.');
  286. results.push(subdomain ? '.' + d : d);
  287. };
  288. this.walk(
  289. onMatches,
  290. res.node, // Performing DFS from prefix
  291. inputTokens
  292. );
  293. return results;
  294. };
  295. /**
  296. * Method used to delete a prefix from the trie.
  297. */
  298. public remove(suffix: string): boolean {
  299. const res = this.getSingleChildLeaf(hostnameToTokens(suffix, 0));
  300. if (res === null) return false;
  301. if (!res.node[0]) return false;
  302. this.$size--;
  303. const { node, toPrune, tokenToPrune } = res;
  304. if (tokenToPrune && toPrune) {
  305. toPrune[3].delete(tokenToPrune);
  306. } else {
  307. node[0] = false;
  308. }
  309. return true;
  310. };
  311. // eslint-disable-next-line @typescript-eslint/unbound-method -- safe
  312. public delete = this.remove;
  313. /**
  314. * Method used to assert whether the given prefix exists in the Trie.
  315. */
  316. public has(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
  317. let hostnameFromIndex = 0;
  318. if (suffix[0] === '.') {
  319. hostnameFromIndex = 1;
  320. }
  321. const res = this.walkIntoLeafWithSuffix(suffix, hostnameFromIndex);
  322. if (res === null) return false;
  323. if (!res.node[0]) return false;
  324. if (includeAllSubdomain) return res.node[1];
  325. return true;
  326. };
  327. public dump(onSuffix: (suffix: string) => void, withSort?: boolean): void;
  328. public dump(onSuffix?: null, withSort?: boolean): string[];
  329. public dump(onSuffix?: ((suffix: string) => void) | null, withSort = false): string[] | void {
  330. const results: string[] = [];
  331. const handleSuffix = onSuffix
  332. ? (suffix: string[], subdomain: boolean) => {
  333. const d = fastStringArrayJoin(suffix, '.');
  334. onSuffix(subdomain ? '.' + d : d);
  335. }
  336. : (suffix: string[], subdomain: boolean) => {
  337. const d = fastStringArrayJoin(suffix, '.');
  338. results.push(subdomain ? '.' + d : d);
  339. };
  340. if (withSort) {
  341. this.walkWithSort(handleSuffix);
  342. } else {
  343. this.walk(handleSuffix);
  344. }
  345. return results;
  346. };
  347. public dumpMeta(onMeta: (meta: Meta) => void, withSort?: boolean): void;
  348. public dumpMeta(onMeta?: null, withSort?: boolean): Meta[];
  349. public dumpMeta(onMeta?: ((meta: Meta) => void) | null, withSort = false): Meta[] | void {
  350. const results: Meta[] = [];
  351. const handleMeta = onMeta
  352. ? (_suffix: string[], _subdomain: boolean, meta: Meta) => onMeta(meta)
  353. : (_suffix: string[], _subdomain: boolean, meta: Meta) => results.push(meta);
  354. if (withSort) {
  355. this.walkWithSort(handleMeta);
  356. } else {
  357. this.walk(handleMeta);
  358. }
  359. return results;
  360. };
  361. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void, withSort?: boolean): void;
  362. public dumpWithMeta(onMeta?: null, withSort?: boolean): Array<[string, Meta | undefined]>;
  363. public dumpWithMeta(onSuffix?: ((suffix: string, meta: Meta | undefined) => void) | null, withSort = false): Array<[string, Meta | undefined]> | void {
  364. const results: Array<[string, Meta | undefined]> = [];
  365. const handleSuffix = onSuffix
  366. ? (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  367. const d = fastStringArrayJoin(suffix, '.');
  368. return onSuffix(subdomain ? '.' + d : d, meta);
  369. }
  370. : (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  371. const d = fastStringArrayJoin(suffix, '.');
  372. results.push([subdomain ? '.' + d : d, meta]);
  373. };
  374. if (withSort) {
  375. this.walkWithSort(handleSuffix);
  376. } else {
  377. this.walk(handleSuffix);
  378. }
  379. return results;
  380. };
  381. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  382. return fastStringArrayJoin(
  383. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  384. '\n'
  385. );
  386. }
  387. public [util.inspect.custom](depth: number) {
  388. return this.inspect(depth);
  389. };
  390. }
  391. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  392. public smolTree = true;
  393. add(suffix: string, includeAllSubdomain = suffix[0] === '.', meta?: Meta, hostnameFromIndex = 0): void {
  394. let node: TrieNode<Meta> = this.$root;
  395. let curNodeChildren: Map<string, TrieNode<Meta>> = node[3];
  396. if (hostnameFromIndex === 0 && suffix[0] === '.') {
  397. hostnameFromIndex = 1;
  398. }
  399. const onToken = (token: string) => {
  400. curNodeChildren = node[3];
  401. if (curNodeChildren.has(token)) {
  402. node = curNodeChildren.get(token)!;
  403. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  404. if (node[1]) {
  405. return true;
  406. }
  407. } else {
  408. const newNode = createNode(false, node);
  409. curNodeChildren.set(token, newNode);
  410. node = newNode;
  411. }
  412. return false;
  413. };
  414. // When walkHostnameTokens returns true, we should skip the rest
  415. if (walkHostnameTokens(suffix, onToken, hostnameFromIndex)) {
  416. return;
  417. }
  418. // If we are in smolTree mode, we need to do something at the end of the loop
  419. if (includeAllSubdomain) {
  420. // Trying to add `[.]sub.example.com` where there is already a `blog.sub.example.com` in the trie
  421. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  422. // (/** parent */ node[2]!)[0] = false;
  423. // Removing the rest of the parent's child nodes
  424. node[3].clear();
  425. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  426. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  427. } else if (node[1]) {
  428. // Trying to add `example.com` when there is already a `.example.com` in the trie
  429. // No need to increment size and set SENTINEL to true (skip this "new" item)
  430. return;
  431. }
  432. node[0] = true;
  433. node[1] = includeAllSubdomain;
  434. node[4] = meta!;
  435. }
  436. public whitelist(suffix: string, includeAllSubdomain = suffix[0] === '.', hostnameFromIndex = 0) {
  437. if (suffix[0] === '.') {
  438. hostnameFromIndex = 1;
  439. }
  440. const tokens = hostnameToTokens(suffix, hostnameFromIndex);
  441. const res = this.getSingleChildLeaf(tokens);
  442. if (res === null) return;
  443. const { node, toPrune, tokenToPrune } = res;
  444. // Trying to whitelist `[start].sub.example.com` where there might already be a `[start]blog.sub.example.com` in the trie
  445. if (includeAllSubdomain) {
  446. // If there is a `[start]sub.example.com` here, remove it
  447. node[0] = false;
  448. node[1] = false;
  449. // Removing all the child nodes by empty the children
  450. node[3].clear();
  451. } else {
  452. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  453. node[1] = false;
  454. }
  455. // return early if not found
  456. if (!node[0]) return;
  457. if (tokenToPrune && toPrune) {
  458. toPrune[3].delete(tokenToPrune);
  459. } else {
  460. node[0] = false;
  461. }
  462. };
  463. }
  464. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  465. get size() {
  466. return this.$size;
  467. }
  468. add(suffix: string, includeAllSubdomain = suffix[0] === '.', meta?: Meta, hostnameFromIndex = 0): void {
  469. let node: TrieNode<Meta> = this.$root;
  470. const onToken = (token: string) => {
  471. if (node[3].has(token)) {
  472. node = node[3].get(token)!;
  473. } else {
  474. const newNode = createNode(false, node);
  475. node[3].set(token, newNode);
  476. node = newNode;
  477. }
  478. return false;
  479. };
  480. if (hostnameFromIndex === 0 && suffix[0] === '.') {
  481. hostnameFromIndex = 1;
  482. }
  483. // When walkHostnameTokens returns true, we should skip the rest
  484. if (walkHostnameTokens(suffix, onToken, hostnameFromIndex)) {
  485. return;
  486. }
  487. // if same entry has been added before, skip
  488. if (node[0]) {
  489. return;
  490. }
  491. this.$size++;
  492. node[0] = true;
  493. node[1] = includeAllSubdomain;
  494. node[4] = meta!;
  495. }
  496. }
  497. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  498. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  499. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  500. if (smolTree) {
  501. return new HostnameSmolTrie(from);
  502. }
  503. return new HostnameTrie(from);
  504. };
  505. export type Trie = ReturnType<typeof createTrie>;
  506. // function deepEqualArray(a: string[], b: string[]) {
  507. // let len = a.length;
  508. // if (len !== b.length) return false;
  509. // while (len--) {
  510. // if (a[len] !== b[len]) return false;
  511. // }
  512. // return true;
  513. // };
  514. function subStringEqual(needle: string, haystack: string, needleIndex = 0) {
  515. for (let i = 0, l = haystack.length; i < l; i++) {
  516. if (needle[i + needleIndex] !== haystack[i]) return false;
  517. }
  518. return true;
  519. }