trie.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /**
  2. * Hostbane-Optimized Trie based on Mnemonist Trie
  3. */
  4. import { fastStringArrayJoin } from './misc';
  5. import util from 'node:util';
  6. import { noop } from 'foxact/noop';
  7. type TrieNode<Meta = any> = [
  8. boolean, /** sentinel */
  9. TrieNode | null, /** parent */
  10. Map<string, TrieNode>, /** children */
  11. Meta /** meta */
  12. ];
  13. const deepTrieNodeToJSON = (
  14. node: TrieNode,
  15. unpackMeta: ((meta?: any) => string) | undefined
  16. ) => {
  17. const obj: Record<string, any> = {};
  18. if (node[0]) {
  19. obj['[start]'] = node[0];
  20. }
  21. if (node[3] != null) {
  22. if (unpackMeta) {
  23. obj['[meta]'] = unpackMeta(node[3]);
  24. } else {
  25. obj['[meta]'] = node[3];
  26. }
  27. }
  28. node[2].forEach((value, key) => {
  29. obj[key] = deepTrieNodeToJSON(value, unpackMeta);
  30. });
  31. return obj;
  32. };
  33. const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
  34. export const hostnameToTokens = (hostname: string): string[] => {
  35. const tokens = hostname.split('.');
  36. const results: string[] = [];
  37. let token = '';
  38. for (let i = 0, l = tokens.length; i < l; i++) {
  39. if (i > 0) {
  40. results.push('.');
  41. }
  42. token = tokens[i];
  43. if (token.length > 0) {
  44. results.push(token);
  45. }
  46. }
  47. return results;
  48. };
  49. const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
  50. const tokens = hostname.split('.');
  51. let token = '';
  52. const l = tokens.length - 1;
  53. for (let i = l; i >= 0; i--) {
  54. if (
  55. i < l // when i === l, we are at the first of hostname, no splitor there
  56. // when onToken returns true, we should skip the rest of the loop
  57. && onToken('.')
  58. ) {
  59. return true;
  60. }
  61. token = tokens[i];
  62. if (
  63. token.length > 0
  64. // when onToken returns true, we should skip the rest of the loop
  65. && onToken(token)
  66. ) {
  67. return true;
  68. }
  69. }
  70. return false;
  71. };
  72. interface FindSingleChildLeafResult<Meta> {
  73. node: TrieNode<Meta>,
  74. toPrune: TrieNode<Meta> | null,
  75. tokenToPrune: string | null,
  76. parent: TrieNode<Meta>
  77. }
  78. abstract class Triebase<Meta = any> {
  79. protected readonly $root: TrieNode<Meta> = createNode();
  80. protected $size = 0;
  81. get root() {
  82. return this.$root;
  83. }
  84. constructor(from?: string[] | Set<string> | null) {
  85. // Actually build trie
  86. if (Array.isArray(from)) {
  87. for (let i = 0, l = from.length; i < l; i++) {
  88. this.add(from[i]);
  89. }
  90. } else if (from) {
  91. from.forEach((value) => this.add(value));
  92. }
  93. }
  94. public abstract add(suffix: string, meta?: Meta): void;
  95. protected walkIntoLeafWithTokens(
  96. tokens: string[],
  97. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  98. ) {
  99. let node: TrieNode = this.$root;
  100. let parent: TrieNode = node;
  101. let token: string;
  102. for (let i = tokens.length - 1; i >= 0; i--) {
  103. token = tokens[i];
  104. // if (token === '') {
  105. // break;
  106. // }
  107. parent = node;
  108. if (node[2].has(token)) {
  109. node = node[2].get(token)!;
  110. } else {
  111. return null;
  112. }
  113. onLoop(node, parent, token);
  114. }
  115. return { node, parent };
  116. };
  117. protected walkIntoLeafWithSuffix(
  118. suffix: string,
  119. onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
  120. ) {
  121. let node: TrieNode = this.$root;
  122. let parent: TrieNode = node;
  123. const onToken = (token: string) => {
  124. if (token === '') {
  125. return true;
  126. }
  127. parent = node;
  128. if (node[2].has(token)) {
  129. node = node[2].get(token)!;
  130. } else {
  131. return null;
  132. }
  133. onLoop(node, parent, token);
  134. return false;
  135. };
  136. if (walkHostnameTokens(suffix, onToken) === null) {
  137. return null;
  138. }
  139. return { node, parent };
  140. };
  141. public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
  142. private walk(
  143. onMatches: (suffix: string[], meta: Meta) => void,
  144. initialNode = this.$root,
  145. initialSuffix: string[] = []
  146. ) {
  147. const nodeStack: Array<TrieNode<Meta>> = [initialNode];
  148. // Resolving initial string (begin the start of the stack)
  149. const suffixStack: string[][] = [initialSuffix];
  150. let node: TrieNode<Meta> = initialNode;
  151. do {
  152. node = nodeStack.pop()!;
  153. const suffix = suffixStack.pop()!;
  154. node[2].forEach((childNode, k) => {
  155. // Pushing the child node to the stack for next iteration of DFS
  156. nodeStack.push(childNode);
  157. suffixStack.push([k, ...suffix]);
  158. });
  159. // If the node is a sentinel, we push the suffix to the results
  160. if (node[0]) {
  161. onMatches(suffix, node[3]);
  162. }
  163. } while (nodeStack.length);
  164. };
  165. protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
  166. let toPrune: TrieNode | null = null;
  167. let tokenToPrune: string | null = null;
  168. const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
  169. // Keeping track of a potential branch to prune
  170. // Even if the node size is 1, but the single child is ".", we should retain the branch
  171. // Since the "." could be special if it is the leaf-est node
  172. const onlyChild = node[2].size < 2 && !node[2].has('.');
  173. if (toPrune != null) { // the top-est branch that could potentially being pruned
  174. if (!onlyChild) {
  175. // The branch has moew than single child, retain the branch.
  176. // And we need to abort prune the parent, so we set it to null
  177. toPrune = null;
  178. tokenToPrune = null;
  179. }
  180. } else if (onlyChild) {
  181. // There is only one token child, or no child at all, we can prune it safely
  182. // It is now the top-est branch that could potentially being pruned
  183. toPrune = parent;
  184. tokenToPrune = token;
  185. }
  186. };
  187. const res = this.walkIntoLeafWithTokens(tokens, onLoop);
  188. if (res === null) return null;
  189. return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
  190. };
  191. /**
  192. * Method used to retrieve every item in the trie with the given prefix.
  193. */
  194. public find(
  195. inputSuffix: string,
  196. /** @default true */ includeEqualWithSuffix = true
  197. ): string[] {
  198. // if (smolTree) {
  199. // throw new Error('A Trie with smolTree enabled cannot perform find!');
  200. // }
  201. const inputTokens = hostnameToTokens(inputSuffix);
  202. const res = this.walkIntoLeafWithTokens(inputTokens);
  203. if (res === null) return [];
  204. const matches: string[][] = [];
  205. const onMatches = includeEqualWithSuffix
  206. // fast path (default option)
  207. ? (suffix: string[]) => matches.push(suffix)
  208. // slow path
  209. : (suffix: string[]) => {
  210. if (!deepEqualArray(suffix, inputTokens)) {
  211. matches.push(suffix);
  212. }
  213. };
  214. this.walk(
  215. onMatches,
  216. res.node, // Performing DFS from prefix
  217. inputTokens
  218. );
  219. return matches.map((m) => fastStringArrayJoin(m, ''));
  220. };
  221. /**
  222. * Method used to delete a prefix from the trie.
  223. */
  224. public remove(suffix: string): boolean {
  225. const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
  226. if (res === null) return false;
  227. if (!res.node[0]) return false;
  228. this.$size--;
  229. const { node, toPrune, tokenToPrune } = res;
  230. if (tokenToPrune && toPrune) {
  231. toPrune[2].delete(tokenToPrune);
  232. } else {
  233. node[0] = false;
  234. }
  235. return true;
  236. };
  237. // eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
  238. public delete = this.remove;
  239. /**
  240. * Method used to assert whether the given prefix exists in the Trie.
  241. */
  242. public has(suffix: string): boolean {
  243. const res = this.walkIntoLeafWithSuffix(suffix);
  244. return res
  245. ? res.node[0]
  246. : false;
  247. };
  248. public dump(onSuffix: (suffix: string) => void): void;
  249. public dump(): string[];
  250. public dump(onSuffix?: (suffix: string) => void): string[] | void {
  251. const results: string[] = [];
  252. const handleSuffix = onSuffix
  253. ? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
  254. : (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
  255. this.walk(handleSuffix);
  256. return results;
  257. };
  258. public dumpMeta() {
  259. const results: Meta[] = [];
  260. this.walk((_suffix, meta) => {
  261. results.push(meta);
  262. });
  263. return results;
  264. };
  265. public dumpWithMeta() {
  266. const results: Array<[string, Meta]> = [];
  267. this.walk((suffix, meta) => {
  268. results.push([fastStringArrayJoin(suffix, ''), meta]);
  269. });
  270. return results;
  271. };
  272. public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
  273. return fastStringArrayJoin(
  274. JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
  275. '\n'
  276. );
  277. }
  278. public [util.inspect.custom](depth: number) {
  279. return this.inspect(depth);
  280. };
  281. }
  282. export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
  283. public smolTree = true;
  284. add(suffix: string, meta?: Meta): void {
  285. let node: TrieNode<Meta> = this.$root;
  286. let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
  287. const onToken = (token: string) => {
  288. curNodeChildren = node[2];
  289. if (curNodeChildren.has(token)) {
  290. node = curNodeChildren.get(token)!;
  291. // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
  292. if (node[0] && token === '.') {
  293. return true;
  294. }
  295. } else {
  296. const newNode = createNode(node);
  297. curNodeChildren.set(token, newNode);
  298. node = newNode;
  299. }
  300. return false;
  301. };
  302. // When walkHostnameTokens returns true, we should skip the rest
  303. if (walkHostnameTokens(suffix, onToken)) {
  304. return;
  305. }
  306. // If we are in smolTree mode, we need to do something at the end of the loop
  307. if (suffix[0] === '.') {
  308. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  309. // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
  310. (/** parent */ node[1]!)[0] = false;
  311. // Removing the rest of the parent's child nodes
  312. node[2].clear();
  313. // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
  314. // we can use else-if here, because the children is now empty, we don't need to check the leading "."
  315. } else if (node[2].get('.')?.[0] === true) {
  316. // Trying to add `example.com` when there is already a `.example.com` in the trie
  317. // No need to increment size and set SENTINEL to true (skip this "new" item)
  318. return;
  319. }
  320. node[0] = true;
  321. node[3] = meta!;
  322. }
  323. public whitelist(suffix: string) {
  324. const tokens = hostnameToTokens(suffix);
  325. const res = this.getSingleChildLeaf(tokens);
  326. if (res === null) return;
  327. const { node, toPrune, tokenToPrune, parent } = res;
  328. // Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  329. if (tokens[0] === '.') {
  330. // If there is a `[start]sub.example.com` here, remove it
  331. parent[0] = false;
  332. // Removing all the child nodes by empty the children
  333. // This removes the only child ".", which removes "blog.sub.example.com"
  334. parent[2].clear();
  335. }
  336. // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
  337. const dotNode = node[2].get('.');
  338. if (dotNode) {
  339. dotNode[0] = false;
  340. }
  341. // return early if not found
  342. if (!node[0]) return;
  343. if (tokenToPrune && toPrune) {
  344. toPrune[2].delete(tokenToPrune);
  345. } else {
  346. node[0] = false;
  347. }
  348. };
  349. }
  350. export class HostnameTrie<Meta = any> extends Triebase<Meta> {
  351. get size() {
  352. return this.$size;
  353. }
  354. add(suffix: string, meta?: Meta): void {
  355. let node: TrieNode<Meta> = this.$root;
  356. const onToken = (token: string) => {
  357. if (node[2].has(token)) {
  358. node = node[2].get(token)!;
  359. } else {
  360. const newNode = createNode(node);
  361. node[2].set(token, newNode);
  362. node = newNode;
  363. }
  364. return false;
  365. };
  366. // When walkHostnameTokens returns true, we should skip the rest
  367. if (walkHostnameTokens(suffix, onToken)) {
  368. return;
  369. }
  370. if (!node[0]) {
  371. this.$size++;
  372. node[0] = true;
  373. node[3] = meta!;
  374. }
  375. }
  376. }
  377. export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  378. export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  379. export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  380. if (smolTree) {
  381. return new HostnameSmolTrie(from);
  382. }
  383. return new HostnameTrie(from);
  384. };
  385. export type Trie = ReturnType<typeof createTrie>;
  386. function deepEqualArray(a: string[], b: string[]) {
  387. let len = a.length;
  388. if (len !== b.length) return false;
  389. while (len--) {
  390. if (a[len] !== b[len]) return false;
  391. }
  392. return true;
  393. };