trie.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. function deepTrieNodeToJSON(node: TrieNode,
  14. unpackMeta: ((meta?: any) => string) | undefined) {
  15. const obj: Record<string, any> = {};
  16. if (node[0]) {
  17. obj['[start]'] = node[0];
  18. }
  19. if (node[3] != null) {
  20. if (unpackMeta) {
  21. obj['[meta]'] = unpackMeta(node[3]);
  22. } else {
  23. obj['[meta]'] = node[3];
  24. }
  25. }
  26. node[2].forEach((value, key) => {
  27. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  28. });
  29. return obj;
  30. }
  31. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  32. export function hostnameToTokens(hostname: string): string[] {
  33. const tokens = hostname.split('.');
  34. const results: string[] = [];
  35. let token = '';
  36. for (let i = 0, l = tokens.length; i < l; i++) {
  37. if (i > 0) {
  38. results.push('.');
  39. }
  40. token = tokens[i];
  41. if (token.length > 0) {
  42. results.push(token);
  43. }
  44. }
  45. return results;
  46. }
  47. function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
  48. const tokens = hostname.split('.');
  49. let token = '';
  50. const l = tokens.length - 1;
  51. for (let i = l; i >= 0; i--) {
  52. if (
  53. i < l // when i === l, we are at the first of hostname, no splitor there
  54. // when onToken returns true, we should skip the rest of the loop
  55. && onToken('.')
  56. ) {
  57. return true;
  58. }
  59. token = tokens[i];
  60. if (
  61. token.length > 0
  62. // when onToken returns true, we should skip the rest of the loop
  63. && onToken(token)
  64. ) {
  65. return true;
  66. }
  67. }
  68. return false;
  69. }
  70. interface FindSingleChildLeafResult<Meta> {
  71. node: TrieNode<Meta>,
  72. toPrune: TrieNode<Meta> | null,
  73. tokenToPrune: string | null,
  74. parent: TrieNode<Meta>
  75. }
  76. abstract class Triebase<Meta = any> {
  77. protected readonly $root: TrieNode<Meta> = createNode();
  78. protected $size = 0;
  79. get root() {
  80. return this.$root;
  81. }
  82. constructor(from?: string[] | Set<string> | null) {
  83. // Actually build trie
  84. if (Array.isArray(from)) {
  85. for (let i = 0, l = from.length; i < l; i++) {
  86. this.add(from[i]);
  87. }
  88. } else if (from) {
  89. from.forEach((value) => this.add(value));
  90. }
  91. }
  92. public abstract add(suffix: string, meta?: Meta): void;
  93. protected walkIntoLeafWithTokens(
  94. tokens: string[],
  95. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  96. ) {
  97. let node: TrieNode = this.$root;
  98. let parent: TrieNode = node;
  99. let token: string;
  100. for (let i = tokens.length - 1; i >= 0; i--) {
  101. token = tokens[i];
  102. // if (token === '') {
  103. // break;
  104. // }
  105. parent = node;
  106. if (node[2].has(token)) {
  107. node = node[2].get(token)!;
  108. } else {
  109. return null;
  110. }
  111. onLoop(node, parent, token);
  112. }
  113. return { node, parent };
  114. };
  115. protected walkIntoLeafWithSuffix(
  116. suffix: string,
  117. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  118. ) {
  119. let node: TrieNode = this.$root;
  120. let parent: TrieNode = node;
  121. const onToken = (token: string) => {
  122. if (token === '') {
  123. return true;
  124. }
  125. parent = node;
  126. if (node[2].has(token)) {
  127. node = node[2].get(token)!;
  128. } else {
  129. return null;
  130. }
  131. onLoop(node, parent, token);
  132. return false;
  133. };
  134. if (walkHostnameTokens(suffix, onToken) === null) {
  135. return null;
  136. }
  137. return { node, parent };
  138. };
  139. public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
  140. private walk(
  141. onMatches: (suffix: string[], meta: Meta) => void,
  142. initialNode = this.$root,
  143. initialSuffix: string[] = []
  144. ) {
  145. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  146. // Resolving initial string (begin the start of the stack)
  147. const suffixStack: string[][] = [initialSuffix];
  148. let node: TrieNode<Meta> = initialNode;
  149. do {
  150. node = nodeStack.pop()!;
  151. const suffix = suffixStack.pop()!;
  152. node[2].forEach((childNode, k) => {
  153. // Pushing the child node to the stack for next iteration of DFS
  154. nodeStack.push(childNode);
  155. suffixStack.push([k, ...suffix]);
  156. });
  157. // If the node is a sentinel, we push the suffix to the results
  158. if (node[0]) {
  159. onMatches(suffix, node[3]);
  160. }
  161. } while (nodeStack.length);
  162. };
  163. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  164. let toPrune: TrieNode | null = null;
  165. let tokenToPrune: string | null = null;
  166. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  167. // Keeping track of a potential branch to prune
  168. // Even if the node size is 1, but the single child is ".", we should retain the branch
  169. // Since the "." could be special if it is the leaf-est node
  170. const onlyChild = node[2].size < 2 && !node[2].has('.');
  171. if (toPrune != null) { // the top-est branch that could potentially being pruned
  172. if (!onlyChild) {
  173. // The branch has moew than single child, retain the branch.
  174. // And we need to abort prune the parent, so we set it to null
  175. toPrune = null;
  176. tokenToPrune = null;
  177. }
  178. } else if (onlyChild) {
  179. // There is only one token child, or no child at all, we can prune it safely
  180. // It is now the top-est branch that could potentially being pruned
  181. toPrune = parent;
  182. tokenToPrune = token;
  183. }
  184. };
  185. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  186. if (res === null) return null;
  187. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  188. };
  189. /**
  190. * Method used to retrieve every item in the trie with the given prefix.
  191. */
  192. public find(
  193. inputSuffix: string,
  194. /** @default true */ includeEqualWithSuffix = true
  195. ): string[] {
  196. // if (smolTree) {
  197. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  198. // }
  199. const inputTokens = hostnameToTokens(inputSuffix);
  200. const res = this.walkIntoLeafWithTokens(inputTokens);
  201. if (res === null) return [];
  202. const matches: string[][] = [];
  203. const onMatches = includeEqualWithSuffix
  204. // fast path (default option)
  205. ? (suffix: string[]) => matches.push(suffix)
  206. // slow path
  207. : (suffix: string[]) => {
  208. if (!deepEqualArray(suffix, inputTokens)) {
  209. matches.push(suffix);
  210. }
  211. };
  212. this.walk(
  213. onMatches,
  214. res.node, // Performing DFS from prefix
  215. inputTokens
  216. );
  217. return matches.map((m) => fastStringArrayJoin(m, ''));
  218. };
  219. /**
  220. * Method used to delete a prefix from the trie.
  221. */
  222. public remove(suffix: string): boolean {
  223. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  224. if (res === null) return false;
  225. if (!res.node[0]) return false;
  226. this.$size--;
  227. const { node, toPrune, tokenToPrune } = res;
  228. if (tokenToPrune && toPrune) {
  229. toPrune[2].delete(tokenToPrune);
  230. } else {
  231. node[0] = false;
  232. }
  233. return true;
  234. };
  235. // eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
  236. public delete = this.remove;
  237. /**
  238. * Method used to assert whether the given prefix exists in the Trie.
  239. */
  240. public has(suffix: string): boolean {
  241. const res = this.walkIntoLeafWithSuffix(suffix);
  242. return res
  243. ? res.node[0]
  244. : false;
  245. };
  246. public dump(onSuffix: (suffix: string) => void): void;
  247. public dump(): string[];
  248. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  249. const results: string[] = [];
  250. const handleSuffix = onSuffix
  251. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  252. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  253. this.walk(handleSuffix);
  254. return results;
  255. };
  256. public dumpMeta() {
  257. const results: Meta[] = [];
  258. this.walk((_suffix, meta) => {
  259. results.push(meta);
  260. });
  261. return results;
  262. };
  263. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
  264. public dumpWithMeta(): string[];
  265. public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): string[] | void {
  266. const results: string[] = [];
  267. const handleSuffix = onSuffix
  268. ? (suffix: string[], meta: Meta | undefined) => onSuffix(fastStringArrayJoin(suffix, ''), meta)
  269. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  270. this.walk(handleSuffix);
  271. return results;
  272. };
  273. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  274. return fastStringArrayJoin(
  275. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  276. '\n'
  277. );
  278. }
  279. public [util.inspect.custom](depth: number) {
  280. return this.inspect(depth);
  281. };
  282. }
  283. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  284. public smolTree = true;
  285. add(suffix: string, meta?: Meta): void {
  286. let node: TrieNode<Meta> = this.$root;
  287. let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
  288. const onToken = (token: string) => {
  289. curNodeChildren = node[2];
  290. if (curNodeChildren.has(token)) {
  291. node = curNodeChildren.get(token)!;
  292. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  293. if (node[0] && token === '.') {
  294. return true;
  295. }
  296. } else {
  297. const newNode = createNode(node);
  298. curNodeChildren.set(token, newNode);
  299. node = newNode;
  300. }
  301. return false;
  302. };
  303. // When walkHostnameTokens returns true, we should skip the rest
  304. if (walkHostnameTokens(suffix, onToken)) {
  305. return;
  306. }
  307. // If we are in smolTree mode, we need to do something at the end of the loop
  308. if (suffix[0] === '.') {
  309. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  310. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  311. (/** parent */ node[1]!)[0] = false;
  312. // Removing the rest of the parent's child nodes
  313. node[2].clear();
  314. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  315. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  316. } else if (node[2].get('.')?.[0] === true) {
  317. // Trying to add `example.com` when there is already a `.example.com` in the trie
  318. // No need to increment size and set SENTINEL to true (skip this "new" item)
  319. return;
  320. }
  321. node[0] = true;
  322. node[3] = meta!;
  323. }
  324. public whitelist(suffix: string) {
  325. const tokens = hostnameToTokens(suffix);
  326. const res = this.getSingleChildLeaf(tokens);
  327. if (res === null) return;
  328. const { node, toPrune, tokenToPrune, parent } = res;
  329. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  330. if (tokens[0] === '.') {
  331. // If there is a `[start]sub.example.com` here, remove it
  332. parent[0] = false;
  333. // Removing all the child nodes by empty the children
  334. // This removes the only child ".", which removes "blog.sub.example.com"
  335. parent[2].clear();
  336. } else {
  337. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  338. const dotNode = node[2].get('.');
  339. if (dotNode) {
  340. dotNode[0] = false;
  341. }
  342. }
  343. // return early if not found
  344. if (!node[0]) return;
  345. if (tokenToPrune && toPrune) {
  346. toPrune[2].delete(tokenToPrune);
  347. } else {
  348. node[0] = false;
  349. }
  350. };
  351. }
  352. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  353. get size() {
  354. return this.$size;
  355. }
  356. add(suffix: string, meta?: Meta): void {
  357. let node: TrieNode<Meta> = this.$root;
  358. const onToken = (token: string) => {
  359. if (node[2].has(token)) {
  360. node = node[2].get(token)!;
  361. } else {
  362. const newNode = createNode(node);
  363. node[2].set(token, newNode);
  364. node = newNode;
  365. }
  366. return false;
  367. };
  368. // When walkHostnameTokens returns true, we should skip the rest
  369. if (walkHostnameTokens(suffix, onToken)) {
  370. return;
  371. }
  372. if (!node[0]) {
  373. this.$size++;
  374. node[0] = true;
  375. node[3] = meta!;
  376. }
  377. }
  378. }
  379. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  380. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  381. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  382. if (smolTree) {
  383. return new HostnameSmolTrie(from);
  384. }
  385. return new HostnameTrie(from);
  386. };
  387. export type Trie = ReturnType<typeof createTrie>;
  388. function deepEqualArray(a: string[], b: string[]) {
  389. let len = a.length;
  390. if (len !== b.length) return false;
  391. while (len--) {
  392. if (a[len] !== b[len]) return false;
  393. }
  394. return true;
  395. };