trie.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. import FIFO from './fifo';
  8. type TrieNode<Meta = any> = [
  9. boolean, /** end */
  10. boolean, /** includeAllSubdoain (.example.org, ||example.com) */
  11. TrieNode | null, /** parent */
  12. Map<string, TrieNode>, /** children */
  13. Meta /** meta */
  14. ];
  15. function deepTrieNodeToJSON(node: TrieNode,
  16. unpackMeta: ((meta?: any) => string) | undefined) {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. obj['[subdomain]'] = node[1];
  22. if (node[4] != null) {
  23. if (unpackMeta) {
  24. obj['[meta]'] = unpackMeta(node[3]);
  25. } else {
  26. obj['[meta]'] = node[3];
  27. }
  28. }
  29. node[3].forEach((value, key) => {
  30. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  31. });
  32. return obj;
  33. }
  34. const createNode = <Meta = any>(allSubdomain = false, parent: TrieNode | null = null): TrieNode => [false, allSubdomain, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  35. export function hostnameToTokens(hostname: string): string[] {
  36. const tokens = hostname.split('.');
  37. const results: string[] = [];
  38. let token = '';
  39. for (let i = 0, l = tokens.length; i < l; i++) {
  40. token = tokens[i];
  41. if (token.length > 0) {
  42. results.push(token);
  43. }
  44. }
  45. return results;
  46. }
  47. function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
  48. const tokens = hostname.split('.');
  49. const l = tokens.length - 1;
  50. // we are at the first of hostname, no splitor there
  51. let token = '';
  52. for (let i = l; i >= 0; i--) {
  53. token = tokens[i];
  54. if (token.length > 0) {
  55. const t = onToken(token);
  56. if (t === null) {
  57. return null;
  58. }
  59. // if the callback returns true, we should skip the rest
  60. if (t) {
  61. return true;
  62. }
  63. }
  64. }
  65. return false;
  66. }
  67. interface FindSingleChildLeafResult<Meta> {
  68. node: TrieNode<Meta>,
  69. toPrune: TrieNode<Meta> | null,
  70. tokenToPrune: string | null,
  71. parent: TrieNode<Meta>
  72. }
  73. abstract class Triebase<Meta = any> {
  74. protected readonly $root: TrieNode<Meta> = createNode();
  75. protected $size = 0;
  76. get root() {
  77. return this.$root;
  78. }
  79. constructor(from?: string[] | Set<string> | null) {
  80. // Actually build trie
  81. if (Array.isArray(from)) {
  82. for (let i = 0, l = from.length; i < l; i++) {
  83. this.add(from[i]);
  84. }
  85. } else if (from) {
  86. from.forEach((value) => this.add(value));
  87. }
  88. }
  89. public abstract add(suffix: string, includeAllSubdoain?: boolean, meta?: Meta): void;
  90. protected walkIntoLeafWithTokens(
  91. tokens: string[],
  92. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  93. ) {
  94. let node: TrieNode = this.$root;
  95. let parent: TrieNode = node;
  96. let token: string;
  97. for (let i = tokens.length - 1; i >= 0; i--) {
  98. token = tokens[i];
  99. // if (token === '') {
  100. // break;
  101. // }
  102. parent = node;
  103. if (node[3].has(token)) {
  104. node = node[3].get(token)!;
  105. } else {
  106. return null;
  107. }
  108. onLoop(node, parent, token);
  109. }
  110. return { node, parent };
  111. };
  112. protected walkIntoLeafWithSuffix(
  113. suffix: string,
  114. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  115. ) {
  116. let node: TrieNode = this.$root;
  117. let parent: TrieNode = node;
  118. const onToken = (token: string) => {
  119. // if (token === '') {
  120. // return true;
  121. // }
  122. parent = node;
  123. if (node[3].has(token)) {
  124. node = node[3].get(token)!;
  125. } else {
  126. return null;
  127. }
  128. onLoop(node, parent, token);
  129. return false;
  130. };
  131. if (walkHostnameTokens(suffix, onToken) === null) {
  132. return null;
  133. }
  134. return { node, parent };
  135. };
  136. public contains(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
  137. if (suffix[0] === '.') {
  138. suffix = suffix.slice(1);
  139. }
  140. const res = this.walkIntoLeafWithSuffix(suffix);
  141. if (!res) return false;
  142. if (includeAllSubdoain) return res.node[1];
  143. return true;
  144. };
  145. private walk(
  146. onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
  147. initialNode = this.$root,
  148. initialSuffix: string[] = []
  149. ) {
  150. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  151. // Resolving initial string (begin the start of the stack)
  152. const suffixStack: string[][] = [initialSuffix];
  153. let node: TrieNode<Meta> = initialNode;
  154. do {
  155. node = nodeStack.pop()!;
  156. const suffix = suffixStack.pop()!;
  157. node[3].forEach((childNode, k) => {
  158. // Pushing the child node to the stack for next iteration of DFS
  159. nodeStack.push(childNode);
  160. suffixStack.push([k, ...suffix]);
  161. });
  162. // If the node is a sentinel, we push the suffix to the results
  163. if (node[0]) {
  164. onMatches(suffix, node[1], node[4]);
  165. }
  166. } while (nodeStack.length);
  167. };
  168. static compare(this: void, a: string, b: string) {
  169. if (a === b) return 0;
  170. return (a.length - b.length) || a.localeCompare(b);
  171. }
  172. private walkWithSort(
  173. onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
  174. initialNode = this.$root,
  175. initialSuffix: string[] = []
  176. ) {
  177. const nodeStack = new FIFO<TrieNode<Meta>>();
  178. nodeStack.enqueue(initialNode);
  179. // Resolving initial string (begin the start of the stack)
  180. const suffixStack = new FIFO<string[]>();
  181. suffixStack.enqueue(initialSuffix);
  182. let node: TrieNode<Meta> = initialNode;
  183. do {
  184. node = nodeStack.dequeue()!;
  185. const suffix = suffixStack.dequeue()!;
  186. if (node[3].size) {
  187. const keys = Array.from(node[3].keys()).sort(Triebase.compare);
  188. for (let i = 0, l = keys.length; i < l; i++) {
  189. const key = keys[i];
  190. const childNode = node[3].get(key)!;
  191. // Pushing the child node to the stack for next iteration of DFS
  192. nodeStack.enqueue(childNode);
  193. suffixStack.enqueue([key, ...suffix]);
  194. }
  195. }
  196. // If the node is a sentinel, we push the suffix to the results
  197. if (node[0]) {
  198. onMatches(suffix, node[1], node[4]);
  199. }
  200. } while (nodeStack.size);
  201. };
  202. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  203. let toPrune: TrieNode | null = null;
  204. let tokenToPrune: string | null = null;
  205. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  206. // Keeping track of a potential branch to prune
  207. // Even if the node size is 1, but the single child is ".", we should retain the branch
  208. // Since the "." could be special if it is the leaf-est node
  209. const onlyChild = node[3].size === 0 && !node[2];
  210. if (toPrune != null) { // the top-est branch that could potentially being pruned
  211. if (!onlyChild) {
  212. // The branch has moew than single child, retain the branch.
  213. // And we need to abort prune the parent, so we set it to null
  214. toPrune = null;
  215. tokenToPrune = null;
  216. }
  217. } else if (onlyChild) {
  218. // There is only one token child, or no child at all, we can prune it safely
  219. // It is now the top-est branch that could potentially being pruned
  220. toPrune = parent;
  221. tokenToPrune = token;
  222. }
  223. };
  224. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  225. if (res === null) return null;
  226. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  227. };
  228. /**
  229. * Method used to retrieve every item in the trie with the given prefix.
  230. */
  231. public find(
  232. inputSuffix: string,
  233. subdomainOnly = inputSuffix[0] === '.'
  234. // /** @default true */ includeEqualWithSuffix = true
  235. ): string[] {
  236. if (inputSuffix[0] === '.') {
  237. inputSuffix = inputSuffix.slice(1);
  238. }
  239. const inputTokens = hostnameToTokens(inputSuffix);
  240. const res = this.walkIntoLeafWithTokens(inputTokens);
  241. if (res === null) return [];
  242. const results: string[] = [];
  243. const onMatches = subdomainOnly
  244. ? (suffix: string[], subdomain: boolean) => { // fast path (default option)
  245. const d = fastStringArrayJoin(suffix, '.');
  246. if (!subdomain && d === inputSuffix) return;
  247. results.push(subdomain ? '.' + d : d);
  248. }
  249. : (suffix: string[], subdomain: boolean) => { // fast path (default option)
  250. const d = fastStringArrayJoin(suffix, '.');
  251. results.push(subdomain ? '.' + d : d);
  252. };
  253. this.walk(
  254. onMatches,
  255. res.node, // Performing DFS from prefix
  256. inputTokens
  257. );
  258. return results;
  259. };
  260. /**
  261. * Method used to delete a prefix from the trie.
  262. */
  263. public remove(suffix: string): boolean {
  264. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  265. if (res === null) return false;
  266. if (!res.node[0]) return false;
  267. this.$size--;
  268. const { node, toPrune, tokenToPrune } = res;
  269. if (tokenToPrune && toPrune) {
  270. toPrune[3].delete(tokenToPrune);
  271. } else {
  272. node[0] = false;
  273. }
  274. return true;
  275. };
  276. // eslint-disable-next-line @typescript-eslint/unbound-method -- safe
  277. public delete = this.remove;
  278. /**
  279. * Method used to assert whether the given prefix exists in the Trie.
  280. */
  281. public has(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
  282. if (suffix[0] === '.') {
  283. suffix = suffix.slice(1);
  284. }
  285. const res = this.walkIntoLeafWithSuffix(suffix);
  286. if (res === null) return false;
  287. if (!res.node[0]) return false;
  288. if (includeAllSubdoain) return res.node[1];
  289. return true;
  290. };
  291. public dump(onSuffix: (suffix: string) => void): void;
  292. public dump(): string[];
  293. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  294. const results: string[] = [];
  295. const handleSuffix = onSuffix
  296. ? (suffix: string[], subdomain: boolean) => {
  297. const d = fastStringArrayJoin(suffix, '.');
  298. onSuffix(subdomain ? '.' + d : d);
  299. }
  300. : (suffix: string[], subdomain: boolean) => {
  301. const d = fastStringArrayJoin(suffix, '.');
  302. results.push(subdomain ? '.' + d : d);
  303. };
  304. this.walkWithSort(handleSuffix);
  305. return results;
  306. };
  307. public dumpMeta(onMeta: (meta: Meta) => void): void;
  308. public dumpMeta(): Meta[];
  309. public dumpMeta(onMeta?: (meta: Meta) => void): Meta[] | void {
  310. const results: Meta[] = [];
  311. const handleMeta = onMeta
  312. ? (_suffix: string[], _subdomain: boolean, meta: Meta) => onMeta(meta)
  313. : (_suffix: string[], _subdomain: boolean, meta: Meta) => results.push(meta);
  314. this.walk(handleMeta);
  315. return results;
  316. };
  317. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
  318. public dumpWithMeta(): Array<[string, Meta | undefined]>;
  319. public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): Array<[string, Meta | undefined]> | void {
  320. const results: Array<[string, Meta | undefined]> = [];
  321. const handleSuffix = onSuffix
  322. ? (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  323. const d = fastStringArrayJoin(suffix, '.');
  324. return onSuffix(subdomain ? '.' + d : d, meta);
  325. }
  326. : (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
  327. const d = fastStringArrayJoin(suffix, '.');
  328. results.push([subdomain ? '.' + d : d, meta]);
  329. };
  330. this.walk(handleSuffix);
  331. return results;
  332. };
  333. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  334. return fastStringArrayJoin(
  335. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  336. '\n'
  337. );
  338. }
  339. public [util.inspect.custom](depth: number) {
  340. return this.inspect(depth);
  341. };
  342. }
  343. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  344. public smolTree = true;
  345. add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
  346. let node: TrieNode<Meta> = this.$root;
  347. let curNodeChildren: Map<string, TrieNode<Meta>> = node[3];
  348. if (suffix[0] === '.') {
  349. suffix = suffix.slice(1);
  350. }
  351. const onToken = (token: string) => {
  352. curNodeChildren = node[3];
  353. if (curNodeChildren.has(token)) {
  354. node = curNodeChildren.get(token)!;
  355. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  356. if (node[1]) {
  357. return true;
  358. }
  359. } else {
  360. const newNode = createNode(false, node);
  361. curNodeChildren.set(token, newNode);
  362. node = newNode;
  363. }
  364. return false;
  365. };
  366. // When walkHostnameTokens returns true, we should skip the rest
  367. if (walkHostnameTokens(suffix, onToken)) {
  368. return;
  369. }
  370. // If we are in smolTree mode, we need to do something at the end of the loop
  371. if (includeAllSubdoain) {
  372. // Trying to add `[.]sub.example.com` where there is already a `blog.sub.example.com` in the trie
  373. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  374. // (/** parent */ node[2]!)[0] = false;
  375. // Removing the rest of the parent's child nodes
  376. node[3].clear();
  377. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  378. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  379. } else if (node[1]) {
  380. // Trying to add `example.com` when there is already a `.example.com` in the trie
  381. // No need to increment size and set SENTINEL to true (skip this "new" item)
  382. return;
  383. }
  384. node[0] = true;
  385. node[1] = includeAllSubdoain;
  386. node[4] = meta!;
  387. }
  388. public whitelist(suffix: string, includeAllSubdoain = suffix[0] === '.') {
  389. if (suffix[0] === '.') {
  390. suffix = suffix.slice(1);
  391. }
  392. const tokens = hostnameToTokens(suffix);
  393. const res = this.getSingleChildLeaf(tokens);
  394. if (res === null) return;
  395. const { node, toPrune, tokenToPrune } = res;
  396. // Trying to whitelist `[start].sub.example.com` where there might already be a `[start]blog.sub.example.com` in the trie
  397. if (includeAllSubdoain) {
  398. // If there is a `[start]sub.example.com` here, remove it
  399. node[0] = false;
  400. node[1] = false;
  401. // Removing all the child nodes by empty the children
  402. node[3].clear();
  403. } else {
  404. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  405. node[1] = false;
  406. }
  407. // return early if not found
  408. if (!node[0]) return;
  409. if (tokenToPrune && toPrune) {
  410. toPrune[3].delete(tokenToPrune);
  411. } else {
  412. node[0] = false;
  413. }
  414. };
  415. }
  416. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  417. get size() {
  418. return this.$size;
  419. }
  420. add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
  421. let node: TrieNode<Meta> = this.$root;
  422. const onToken = (token: string) => {
  423. if (node[3].has(token)) {
  424. node = node[3].get(token)!;
  425. } else {
  426. const newNode = createNode(false, node);
  427. node[3].set(token, newNode);
  428. node = newNode;
  429. }
  430. return false;
  431. };
  432. if (suffix[0] === '.') {
  433. suffix = suffix.slice(1);
  434. }
  435. // When walkHostnameTokens returns true, we should skip the rest
  436. if (walkHostnameTokens(suffix, onToken)) {
  437. return;
  438. }
  439. // if same entry has been added before, skip
  440. if (node[0]) {
  441. return;
  442. }
  443. this.$size++;
  444. node[0] = true;
  445. node[1] = includeAllSubdoain;
  446. node[4] = meta!;
  447. }
  448. }
  449. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  450. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  451. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  452. if (smolTree) {
  453. return new HostnameSmolTrie(from);
  454. }
  455. return new HostnameTrie(from);
  456. };
  457. export type Trie = ReturnType<typeof createTrie>;
  458. // function deepEqualArray(a: string[], b: string[]) {
  459. // let len = a.length;
  460. // if (len !== b.length) return false;
  461. // while (len--) {
  462. // if (a[len] !== b[len]) return false;
  463. // }
  464. // return true;
  465. // };