trie.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (
  14. node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined
  16. ) => {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. if (node[3] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[2].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. };
  33. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  34. export const hostnameToTokens = (hostname: string): string[] => {
  35. const tokens = hostname.split('.');
  36. const results: string[] = [];
  37. let token = '';
  38. for (let i = 0, l = tokens.length; i < l; i++) {
  39. if (i > 0) {
  40. results.push('.');
  41. }
  42. token = tokens[i];
  43. if (token.length > 0) {
  44. results.push(token);
  45. }
  46. }
  47. return results;
  48. };
  49. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  50. const tokens = hostname.split('.');
  51. let token = '';
  52. const l = tokens.length - 1;
  53. for (let i = l; i >= 0; i--) {
  54. if (
  55. i < l // when i === l, we are at the first of hostname, no splitor there
  56. // when onToken returns true, we should skip the rest of the loop
  57. && onToken('.')
  58. ) {
  59. return true;
  60. }
  61. token = tokens[i];
  62. if (
  63. token.length > 0
  64. // when onToken returns true, we should skip the rest of the loop
  65. && onToken(token)
  66. ) {
  67. return true;
  68. }
  69. }
  70. return false;
  71. };
  72. interface FindSingleChildLeafResult<Meta> {
  73. node: TrieNode<Meta>,
  74. toPrune: TrieNode<Meta> | null,
  75. tokenToPrune: string | null,
  76. parent: TrieNode<Meta>
  77. }
  78. abstract class Triebase<Meta = any> {
  79. protected readonly $root: TrieNode<Meta> = createNode();
  80. protected $size = 0;
  81. get root() {
  82. return this.$root;
  83. }
  84. constructor(from?: string[] | Set<string> | null) {
  85. // Actually build trie
  86. if (Array.isArray(from)) {
  87. for (let i = 0, l = from.length; i < l; i++) {
  88. this.add(from[i]);
  89. }
  90. } else if (from) {
  91. from.forEach((value) => this.add(value));
  92. }
  93. }
  94. public abstract add(suffix: string, meta?: Meta): void;
  95. protected walkIntoLeafWithTokens(
  96. tokens: string[],
  97. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  98. ) {
  99. let node: TrieNode = this.$root;
  100. let parent: TrieNode = node;
  101. let token: string;
  102. for (let i = tokens.length - 1; i >= 0; i--) {
  103. token = tokens[i];
  104. // if (token === '') {
  105. // break;
  106. // }
  107. parent = node;
  108. if (node[2].has(token)) {
  109. node = node[2].get(token)!;
  110. } else {
  111. return null;
  112. }
  113. onLoop(node, parent, token);
  114. }
  115. return { node, parent };
  116. };
  117. protected walkIntoLeafWithSuffix(
  118. suffix: string,
  119. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  120. ) {
  121. let node: TrieNode = this.$root;
  122. let parent: TrieNode = node;
  123. const onToken = (token: string) => {
  124. if (token === '') {
  125. return true;
  126. }
  127. parent = node;
  128. if (node[2].has(token)) {
  129. node = node[2].get(token)!;
  130. } else {
  131. return null;
  132. }
  133. onLoop(node, parent, token);
  134. return false;
  135. };
  136. if (walkHostnameTokens(suffix, onToken) === null) {
  137. return null;
  138. }
  139. return { node, parent };
  140. };
  141. public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
  142. private walk(
  143. onMatches: (suffix: string[], meta: Meta) => void,
  144. initialNode = this.$root,
  145. initialSuffix: string[] = []
  146. ) {
  147. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  148. // Resolving initial string (begin the start of the stack)
  149. const suffixStack: string[][] = [initialSuffix];
  150. let node: TrieNode<Meta> = initialNode;
  151. do {
  152. node = nodeStack.pop()!;
  153. const suffix = suffixStack.pop()!;
  154. node[2].forEach((childNode, k) => {
  155. // Pushing the child node to the stack for next iteration of DFS
  156. nodeStack.push(childNode);
  157. suffixStack.push([k, ...suffix]);
  158. });
  159. // If the node is a sentinel, we push the suffix to the results
  160. if (node[0]) {
  161. onMatches(suffix, node[3]);
  162. }
  163. } while (nodeStack.length);
  164. };
  165. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  166. let toPrune: TrieNode | null = null;
  167. let tokenToPrune: string | null = null;
  168. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  169. // Keeping track of a potential branch to prune
  170. // Even if the node size is 1, but the single child is ".", we should retain the branch
  171. // Since the "." could be special if it is the leaf-est node
  172. const onlyChild = node[2].size < 2 && !node[2].has('.');
  173. if (toPrune != null) { // the top-est branch that could potentially being pruned
  174. if (!onlyChild) {
  175. // The branch has moew than single child, retain the branch.
  176. // And we need to abort prune the parent, so we set it to null
  177. toPrune = null;
  178. tokenToPrune = null;
  179. }
  180. } else if (onlyChild) {
  181. // There is only one token child, or no child at all, we can prune it safely
  182. // It is now the top-est branch that could potentially being pruned
  183. toPrune = parent;
  184. tokenToPrune = token;
  185. }
  186. };
  187. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  188. if (res === null) return null;
  189. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  190. };
  191. /**
  192. * Method used to retrieve every item in the trie with the given prefix.
  193. */
  194. public find(
  195. inputSuffix: string,
  196. /** @default true */ includeEqualWithSuffix = true
  197. ): string[] {
  198. // if (smolTree) {
  199. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  200. // }
  201. const inputTokens = hostnameToTokens(inputSuffix);
  202. const res = this.walkIntoLeafWithTokens(inputTokens);
  203. if (res === null) return [];
  204. const matches: string[][] = [];
  205. const onMatches = includeEqualWithSuffix
  206. // fast path (default option)
  207. ? (suffix: string[]) => matches.push(suffix)
  208. // slow path
  209. : (suffix: string[]) => {
  210. if (!deepEqualArray(suffix, inputTokens)) {
  211. matches.push(suffix);
  212. }
  213. };
  214. this.walk(
  215. onMatches,
  216. res.node, // Performing DFS from prefix
  217. inputTokens
  218. );
  219. return matches.map((m) => fastStringArrayJoin(m, ''));
  220. };
  221. /**
  222. * Method used to delete a prefix from the trie.
  223. */
  224. public remove(suffix: string): boolean {
  225. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  226. if (res === null) return false;
  227. if (!res.node[0]) return false;
  228. this.$size--;
  229. const { node, toPrune, tokenToPrune } = res;
  230. if (tokenToPrune && toPrune) {
  231. toPrune[2].delete(tokenToPrune);
  232. } else {
  233. node[0] = false;
  234. }
  235. return true;
  236. };
  237. // eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
  238. public delete = this.remove;
  239. /**
  240. * Method used to assert whether the given prefix exists in the Trie.
  241. */
  242. public has(suffix: string): boolean {
  243. const res = this.walkIntoLeafWithSuffix(suffix);
  244. return res
  245. ? res.node[0]
  246. : false;
  247. };
  248. public dump(onSuffix: (suffix: string) => void): void;
  249. public dump(): string[];
  250. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  251. const results: string[] = [];
  252. const handleSuffix = onSuffix
  253. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  254. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  255. this.walk(handleSuffix);
  256. return results;
  257. };
  258. public dumpMeta() {
  259. const results: Meta[] = [];
  260. this.walk((_suffix, meta) => {
  261. results.push(meta);
  262. });
  263. return results;
  264. };
  265. public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
  266. public dumpWithMeta(): string[];
  267. public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): string[] | void {
  268. const results: string[] = [];
  269. const handleSuffix = onSuffix
  270. ? (suffix: string[], meta: Meta | undefined) => onSuffix(fastStringArrayJoin(suffix, ''), meta)
  271. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  272. this.walk(handleSuffix);
  273. return results;
  274. };
  275. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  276. return fastStringArrayJoin(
  277. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  278. '\n'
  279. );
  280. }
  281. public [util.inspect.custom](depth: number) {
  282. return this.inspect(depth);
  283. };
  284. }
  285. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  286. public smolTree = true;
  287. add(suffix: string, meta?: Meta): void {
  288. let node: TrieNode<Meta> = this.$root;
  289. let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
  290. const onToken = (token: string) => {
  291. curNodeChildren = node[2];
  292. if (curNodeChildren.has(token)) {
  293. node = curNodeChildren.get(token)!;
  294. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  295. if (node[0] && token === '.') {
  296. return true;
  297. }
  298. } else {
  299. const newNode = createNode(node);
  300. curNodeChildren.set(token, newNode);
  301. node = newNode;
  302. }
  303. return false;
  304. };
  305. // When walkHostnameTokens returns true, we should skip the rest
  306. if (walkHostnameTokens(suffix, onToken)) {
  307. return;
  308. }
  309. // If we are in smolTree mode, we need to do something at the end of the loop
  310. if (suffix[0] === '.') {
  311. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  312. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  313. (/** parent */ node[1]!)[0] = false;
  314. // Removing the rest of the parent's child nodes
  315. node[2].clear();
  316. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  317. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  318. } else if (node[2].get('.')?.[0] === true) {
  319. // Trying to add `example.com` when there is already a `.example.com` in the trie
  320. // No need to increment size and set SENTINEL to true (skip this "new" item)
  321. return;
  322. }
  323. node[0] = true;
  324. node[3] = meta!;
  325. }
  326. public whitelist(suffix: string) {
  327. const tokens = hostnameToTokens(suffix);
  328. const res = this.getSingleChildLeaf(tokens);
  329. if (res === null) return;
  330. const { node, toPrune, tokenToPrune, parent } = res;
  331. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  332. if (tokens[0] === '.') {
  333. // If there is a `[start]sub.example.com` here, remove it
  334. parent[0] = false;
  335. // Removing all the child nodes by empty the children
  336. // This removes the only child ".", which removes "blog.sub.example.com"
  337. parent[2].clear();
  338. } else {
  339. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  340. const dotNode = node[2].get('.');
  341. if (dotNode) {
  342. dotNode[0] = false;
  343. }
  344. }
  345. // return early if not found
  346. if (!node[0]) return;
  347. if (tokenToPrune && toPrune) {
  348. toPrune[2].delete(tokenToPrune);
  349. } else {
  350. node[0] = false;
  351. }
  352. };
  353. }
  354. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  355. get size() {
  356. return this.$size;
  357. }
  358. add(suffix: string, meta?: Meta): void {
  359. let node: TrieNode<Meta> = this.$root;
  360. const onToken = (token: string) => {
  361. if (node[2].has(token)) {
  362. node = node[2].get(token)!;
  363. } else {
  364. const newNode = createNode(node);
  365. node[2].set(token, newNode);
  366. node = newNode;
  367. }
  368. return false;
  369. };
  370. // When walkHostnameTokens returns true, we should skip the rest
  371. if (walkHostnameTokens(suffix, onToken)) {
  372. return;
  373. }
  374. if (!node[0]) {
  375. this.$size++;
  376. node[0] = true;
  377. node[3] = meta!;
  378. }
  379. }
  380. }
  381. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  382. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  383. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  384. if (smolTree) {
  385. return new HostnameSmolTrie(from);
  386. }
  387. return new HostnameTrie(from);
  388. };
  389. export type Trie = ReturnType<typeof createTrie>;
  390. function deepEqualArray(a: string[], b: string[]) {
  391. let len = a.length;
  392. if (len !== b.length) return false;
  393. while (len--) {
  394. if (a[len] !== b[len]) return false;
  395. }
  396. return true;
  397. };