trie.ts 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. // import { Trie } from 'mnemonist';
  5. export const SENTINEL = Symbol('SENTINEL');
  6. const PARENT = Symbol('Parent Node');
  7. type TrieNode = {
  8. [SENTINEL]: boolean,
  9. [PARENT]: TrieNode | null,
  10. [Bun.inspect.custom]: () => string
  11. } & Map<string, TrieNode>;
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node[SENTINEL]) {
  15. obj['[start]'] = node[SENTINEL];
  16. }
  17. node.forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. function trieNodeInspectCustom(this: TrieNode) {
  23. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  24. }
  25. const createNode = (parent: TrieNode | null = null): TrieNode => {
  26. const node = new Map<string, TrieNode>() as TrieNode;
  27. node[SENTINEL] = false;
  28. node[PARENT] = parent;
  29. node[Bun.inspect.custom] = trieNodeInspectCustom;
  30. return node;
  31. };
  32. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  33. let size = 0;
  34. const root: TrieNode = createNode();
  35. const suffixToTokens = hostnameMode
  36. ? (suffix: string) => {
  37. let buf = '';
  38. const tokens: string[] = [];
  39. for (let i = 0, l = suffix.length; i < l; i++) {
  40. const c = suffix[i];
  41. if (c === '.') {
  42. if (buf) {
  43. tokens.push(buf, /* . */ c);
  44. buf = '';
  45. } else {
  46. tokens.push(/* . */ c);
  47. }
  48. } else {
  49. buf += c;
  50. }
  51. }
  52. if (buf) {
  53. tokens.push(buf);
  54. }
  55. return tokens;
  56. }
  57. : (suffix: string) => suffix;
  58. /**
  59. * Method used to add the given prefix to the trie.
  60. */
  61. const add = (suffix: string): void => {
  62. let node: TrieNode = root;
  63. let token: string;
  64. const tokens = suffixToTokens(suffix);
  65. for (let i = tokens.length - 1; i >= 0; i--) {
  66. token = tokens[i];
  67. if (node.has(token)) {
  68. node = node.get(token)!;
  69. // During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
  70. // Dedupe the covered subdomain by skipping
  71. if (smolTree && (node.get('.')?.[SENTINEL])) {
  72. return;
  73. }
  74. } else {
  75. const newNode = createNode(node);
  76. node.set(token, newNode);
  77. node = newNode;
  78. }
  79. if (smolTree) {
  80. // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
  81. if (i === 1 && tokens[0] === '.') {
  82. // If there is a `[start]sub.example.com` here, remove it
  83. node[SENTINEL] = false;
  84. // Removing the rest of the child nodes by creating a new node and disconnecting the old one
  85. const newNode = createNode(node);
  86. node.set('.', newNode);
  87. node = newNode;
  88. break;
  89. }
  90. if (i === 0) {
  91. // Trying to add `example.com` when there is already a `.example.com` in the trie
  92. if (node.get('.')?.[SENTINEL] === true) {
  93. return;
  94. }
  95. }
  96. }
  97. }
  98. // Do we need to increase size?
  99. if (!node[SENTINEL]) {
  100. size++;
  101. }
  102. node[SENTINEL] = true;
  103. };
  104. /**
  105. * @param {string} $suffix
  106. */
  107. const contains = (suffix: string): boolean => {
  108. let node: TrieNode | undefined = root;
  109. let token: string;
  110. const tokens = suffixToTokens(suffix);
  111. for (let i = tokens.length - 1; i >= 0; i--) {
  112. token = tokens[i];
  113. node = node.get(token);
  114. if (!node) return false;
  115. }
  116. return true;
  117. };
  118. /**
  119. * Method used to retrieve every item in the trie with the given prefix.
  120. */
  121. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  122. if (smolTree) {
  123. throw new Error('A Trie with smolTree enabled cannot perform find!');
  124. }
  125. let node: TrieNode | undefined = root;
  126. let token: string;
  127. const inputTokens = suffixToTokens(inputSuffix);
  128. for (let i = inputTokens.length - 1; i >= 0; i--) {
  129. token = inputTokens[i];
  130. if (hostnameMode && token === '') {
  131. break;
  132. }
  133. node = node.get(token);
  134. if (!node) return [];
  135. }
  136. const matches: Array<string | string[]> = [];
  137. // Performing DFS from prefix
  138. const nodeStack: TrieNode[] = [node];
  139. const suffixStack: Array<string | string[]> = [inputTokens];
  140. do {
  141. const suffix: string | string[] = suffixStack.pop()!;
  142. node = nodeStack.pop()!;
  143. if (node[SENTINEL]) {
  144. if (includeEqualWithSuffix) {
  145. matches.push(suffix);
  146. } else if (hostnameMode) {
  147. if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) {
  148. matches.push(suffix);
  149. }
  150. } else if (suffix !== inputTokens) {
  151. matches.push(suffix);
  152. }
  153. }
  154. node.forEach((childNode, k) => {
  155. nodeStack.push(childNode);
  156. if (hostnameMode) {
  157. suffixStack.push([k, ...suffix]);
  158. } else {
  159. suffixStack.push(k + (suffix as string));
  160. }
  161. });
  162. } while (nodeStack.length);
  163. return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
  164. };
  165. /**
  166. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  167. */
  168. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  169. if (smolTree) {
  170. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  171. }
  172. let node: TrieNode | undefined = root;
  173. let token: string;
  174. const inputTokens = suffixToTokens(inputSuffix);
  175. // Find the leaf-est node, and early return if not any
  176. for (let i = inputTokens.length - 1; i >= 0; i--) {
  177. token = inputTokens[i];
  178. node = node.get(token);
  179. if (!node) return;
  180. }
  181. // Performing DFS from prefix
  182. const nodeStack: TrieNode[] = [node];
  183. const suffixStack: Array<string | string[]> = [inputTokens];
  184. do {
  185. const suffix = suffixStack.pop()!;
  186. node = nodeStack.pop()!;
  187. if (node[SENTINEL]) {
  188. // found match, delete it from set
  189. if (hostnameMode) {
  190. set.delete((suffix as string[]).join(''));
  191. } else if (suffix !== inputTokens) {
  192. set.delete(suffix as string);
  193. }
  194. }
  195. node.forEach((childNode, k) => {
  196. nodeStack.push(childNode);
  197. if (hostnameMode) {
  198. const stack = [k, ...suffix];
  199. suffixStack.push(stack);
  200. } else {
  201. suffixStack.push(k + (suffix as string));
  202. }
  203. });
  204. } while (nodeStack.length);
  205. };
  206. /**
  207. * Method used to delete a prefix from the trie.
  208. */
  209. const remove = (suffix: string): boolean => {
  210. let node: TrieNode | undefined = root;
  211. let toPrune: TrieNode | null = null;
  212. let tokenToPrune: string | null = null;
  213. let parent: TrieNode = node;
  214. let token: string;
  215. const suffixTokens = suffixToTokens(suffix);
  216. for (let i = suffixTokens.length - 1; i >= 0; i--) {
  217. token = suffixTokens[i];
  218. parent = node;
  219. node = node.get(token);
  220. if (!node) {
  221. return false;
  222. }
  223. // Keeping track of a potential branch to prune
  224. // If the node is to be pruned, but they are more than one token child in it, we can't prune it
  225. // If there is only one token child, or no child at all, we can prune it safely
  226. const onlyChild = node.size === 1 && node.has(token);
  227. if (onlyChild) {
  228. toPrune = parent;
  229. tokenToPrune = token;
  230. } else if (toPrune !== null) { // not only child, retain the branch
  231. toPrune = null;
  232. tokenToPrune = null;
  233. }
  234. }
  235. if (!node[SENTINEL]) return false;
  236. size--;
  237. if (tokenToPrune && toPrune) {
  238. toPrune.delete(tokenToPrune);
  239. } else {
  240. node[SENTINEL] = false;
  241. }
  242. return true;
  243. };
  244. /**
  245. * Method used to assert whether the given prefix exists in the Trie.
  246. */
  247. const has = (suffix: string): boolean => {
  248. let node: TrieNode = root;
  249. const tokens = suffixToTokens(suffix);
  250. for (let i = tokens.length - 1; i >= 0; i--) {
  251. const token = tokens[i];
  252. if (!node.has(token)) {
  253. return false;
  254. }
  255. node = node.get(token)!;
  256. }
  257. return node[SENTINEL];
  258. };
  259. const dump = () => {
  260. const nodeStack: TrieNode[] = [];
  261. const suffixStack: Array<string | string[]> = [];
  262. nodeStack.push(root);
  263. // Resolving initial string (begin the start of the stack)
  264. suffixStack.push(hostnameMode ? [] : '');
  265. const results: string[] = [];
  266. let node: TrieNode;
  267. do {
  268. node = nodeStack.pop()!;
  269. const suffix = suffixStack.pop()!;
  270. node.forEach((childNode, k) => {
  271. nodeStack.push(childNode);
  272. if (hostnameMode) {
  273. suffixStack.push([k, ...suffix]);
  274. } else {
  275. suffixStack.push(k + (suffix as string));
  276. }
  277. });
  278. if (node[SENTINEL]) {
  279. results.push(hostnameMode ? (suffix as string[]).join('') : (suffix as string));
  280. }
  281. } while (nodeStack.length);
  282. return results;
  283. };
  284. if (Array.isArray(from)) {
  285. for (let i = 0, l = from.length; i < l; i++) {
  286. add(from[i]);
  287. }
  288. } else if (from) {
  289. from.forEach(add);
  290. }
  291. return {
  292. add,
  293. contains,
  294. find,
  295. substractSetInPlaceFromFound,
  296. remove,
  297. delete: remove,
  298. has,
  299. dump,
  300. get size() {
  301. if (smolTree) {
  302. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  303. }
  304. return size;
  305. },
  306. get root() {
  307. return root;
  308. },
  309. [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
  310. };
  311. };
  312. export default createTrie;