trie.ts 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. // import { Trie } from 'mnemonist';
  5. export const SENTINEL = Symbol('SENTINEL');
  6. const PARENT = Symbol('Parent Node');
  7. type TrieNode = {
  8. [SENTINEL]: boolean,
  9. [PARENT]: TrieNode | null,
  10. [Bun.inspect.custom]: () => string
  11. } & Map<string, TrieNode>;
  12. const deepTrieNodeToJSON = (node: TrieNode) => {
  13. const obj: Record<string, any> = {};
  14. if (node[SENTINEL]) {
  15. obj['[start]'] = node[SENTINEL];
  16. }
  17. node.forEach((value, key) => {
  18. obj[key] = deepTrieNodeToJSON(value);
  19. });
  20. return obj;
  21. };
  22. function trieNodeInspectCustom(this: TrieNode) {
  23. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  24. }
  25. const createNode = (parent: TrieNode | null = null): TrieNode => {
  26. const node = new Map<string, TrieNode>() as TrieNode;
  27. node[SENTINEL] = false;
  28. node[PARENT] = parent;
  29. node[Bun.inspect.custom] = trieNodeInspectCustom;
  30. return node;
  31. };
  32. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
  33. let size = 0;
  34. const root: TrieNode = createNode();
  35. const suffixToTokens = hostnameMode
  36. ? (suffix: string) => {
  37. let buf = '';
  38. const tokens: string[] = [];
  39. for (let i = 0, l = suffix.length; i < l; i++) {
  40. const c = suffix[i];
  41. if (c === '.') {
  42. if (buf) {
  43. tokens.push(buf, /* . */ c);
  44. buf = '';
  45. } else {
  46. tokens.push(/* . */ c);
  47. }
  48. } else {
  49. buf += c;
  50. }
  51. }
  52. if (buf) {
  53. tokens.push(buf);
  54. }
  55. return tokens;
  56. }
  57. : (suffix: string) => suffix;
  58. /**
  59. * Method used to add the given prefix to the trie.
  60. */
  61. const add = (suffix: string): void => {
  62. let node: TrieNode = root;
  63. let token: string;
  64. const tokens = suffixToTokens(suffix);
  65. for (let i = tokens.length - 1; i >= 0; i--) {
  66. token = tokens[i];
  67. if (node.has(token)) {
  68. node = node.get(token)!;
  69. if (smolTree) {
  70. if (node.get('.')?.[SENTINEL] === true) {
  71. return;
  72. }
  73. // return;
  74. }
  75. } else {
  76. const newNode = createNode(node);
  77. node.set(token, newNode);
  78. node = newNode;
  79. }
  80. if (smolTree) {
  81. if (i === 1 && tokens[0] === '.') {
  82. node[SENTINEL] = false;
  83. // Trying to add `.sub.example.com` where there is already a `blog.sub.example.com` in the trie
  84. const newNode = createNode(node);
  85. node.set('.', newNode);
  86. node = newNode;
  87. break;
  88. }
  89. if (i === 0) {
  90. // Trying to add `example.com` when there is already a `.example.com` in the trie
  91. if (node.get('.')?.[SENTINEL] === true) {
  92. return;
  93. }
  94. }
  95. }
  96. }
  97. // Do we need to increase size?
  98. if (!node[SENTINEL]) {
  99. size++;
  100. }
  101. node[SENTINEL] = true;
  102. };
  103. /**
  104. * @param {string} $suffix
  105. */
  106. const contains = (suffix: string): boolean => {
  107. let node: TrieNode | undefined = root;
  108. let token: string;
  109. const tokens = suffixToTokens(suffix);
  110. for (let i = tokens.length - 1; i >= 0; i--) {
  111. token = tokens[i];
  112. node = node.get(token);
  113. if (!node) return false;
  114. }
  115. return true;
  116. };
  117. /**
  118. * Method used to retrieve every item in the trie with the given prefix.
  119. */
  120. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  121. if (smolTree) {
  122. throw new Error('A Trie with smolTree enabled cannot perform find!');
  123. }
  124. let node: TrieNode | undefined = root;
  125. let token: string;
  126. const inputTokens = suffixToTokens(inputSuffix);
  127. for (let i = inputTokens.length - 1; i >= 0; i--) {
  128. token = inputTokens[i];
  129. if (hostnameMode && token === '') {
  130. break;
  131. }
  132. node = node.get(token);
  133. if (!node) return [];
  134. }
  135. const matches: Array<string | string[]> = [];
  136. // Performing DFS from prefix
  137. const nodeStack: TrieNode[] = [node];
  138. const suffixStack: Array<string | string[]> = [inputTokens];
  139. do {
  140. const suffix: string | string[] = suffixStack.pop()!;
  141. node = nodeStack.pop()!;
  142. if (node[SENTINEL]) {
  143. if (includeEqualWithSuffix) {
  144. matches.push(suffix);
  145. } else if (hostnameMode) {
  146. if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) {
  147. matches.push(suffix);
  148. }
  149. } else if (suffix !== inputTokens) {
  150. matches.push(suffix);
  151. }
  152. }
  153. node.forEach((childNode, k) => {
  154. nodeStack.push(childNode);
  155. if (hostnameMode) {
  156. suffixStack.push([k, ...suffix]);
  157. } else {
  158. suffixStack.push(k + (suffix as string));
  159. }
  160. });
  161. } while (nodeStack.length);
  162. return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
  163. };
  164. /**
  165. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  166. */
  167. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  168. if (smolTree) {
  169. throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
  170. }
  171. let node: TrieNode | undefined = root;
  172. let token: string;
  173. const inputTokens = suffixToTokens(inputSuffix);
  174. // Find the leaf-est node, and early return if not any
  175. for (let i = inputTokens.length - 1; i >= 0; i--) {
  176. token = inputTokens[i];
  177. node = node.get(token);
  178. if (!node) return;
  179. }
  180. // Performing DFS from prefix
  181. const nodeStack: TrieNode[] = [node];
  182. const suffixStack: Array<string | string[]> = [inputTokens];
  183. do {
  184. const suffix = suffixStack.pop()!;
  185. node = nodeStack.pop()!;
  186. if (node[SENTINEL]) {
  187. if (suffix !== inputTokens) {
  188. // found match, delete it from set
  189. if (hostnameMode) {
  190. set.delete((suffix as string[]).join(''));
  191. } else {
  192. set.delete(suffix as string);
  193. }
  194. }
  195. }
  196. node.forEach((childNode, k) => {
  197. nodeStack.push(childNode);
  198. if (hostnameMode) {
  199. const stack = [k, ...suffix];
  200. suffixStack.push(stack);
  201. } else {
  202. suffixStack.push(k + (suffix as string));
  203. }
  204. });
  205. } while (nodeStack.length);
  206. };
  207. /**
  208. * Method used to delete a prefix from the trie.
  209. */
  210. const remove = (suffix: string): boolean => {
  211. let node: TrieNode | undefined = root;
  212. let toPrune: TrieNode | null = null;
  213. let tokenToPrune: string | null = null;
  214. let parent: TrieNode = node;
  215. let token: string;
  216. const suffixTokens = suffixToTokens(suffix);
  217. for (let i = suffixTokens.length - 1; i >= 0; i--) {
  218. token = suffixTokens[i];
  219. parent = node;
  220. node = node.get(token);
  221. if (!node) {
  222. return false;
  223. }
  224. // Keeping track of a potential branch to prune
  225. // If the node is to be pruned, but they are more than one token child in it, we can't prune it
  226. // If there is only one token child, or no child at all, we can prune it safely
  227. const onlyChild = node.size === 1 && node.has(token);
  228. if (onlyChild) {
  229. toPrune = parent;
  230. tokenToPrune = token;
  231. } else if (toPrune !== null) { // not only child, retain the branch
  232. toPrune = null;
  233. tokenToPrune = null;
  234. }
  235. }
  236. if (!node[SENTINEL]) return false;
  237. size--;
  238. if (tokenToPrune && toPrune) {
  239. toPrune.delete(tokenToPrune);
  240. } else {
  241. node[SENTINEL] = false;
  242. }
  243. return true;
  244. };
  245. /**
  246. * Method used to assert whether the given prefix exists in the Trie.
  247. */
  248. const has = (suffix: string): boolean => {
  249. let node: TrieNode = root;
  250. const tokens = suffixToTokens(suffix);
  251. for (let i = tokens.length - 1; i >= 0; i--) {
  252. const token = tokens[i];
  253. if (!node.has(token)) {
  254. return false;
  255. }
  256. node = node.get(token)!;
  257. }
  258. return node[SENTINEL];
  259. };
  260. if (Array.isArray(from)) {
  261. for (let i = 0, l = from.length; i < l; i++) {
  262. add(from[i]);
  263. }
  264. } else if (from) {
  265. from.forEach(add);
  266. }
  267. const dump = () => {
  268. const nodeStack: TrieNode[] = [];
  269. const suffixStack: Array<string | string[]> = [];
  270. // Resolving initial string
  271. const suffix = hostnameMode ? [] : '';
  272. nodeStack.push(root);
  273. suffixStack.push(suffix);
  274. const results: string[] = [];
  275. let node: TrieNode;
  276. do {
  277. let hasValue = false;
  278. node = nodeStack.pop()!;
  279. const suffix = suffixStack.pop()!;
  280. if (node[SENTINEL]) {
  281. hasValue = true;
  282. }
  283. node.forEach((childNode, k) => {
  284. nodeStack.push(childNode);
  285. if (hostnameMode) {
  286. suffixStack.push([k, ...suffix]);
  287. } else {
  288. suffixStack.push(k + (suffix as string));
  289. }
  290. });
  291. if (hasValue) {
  292. results.push(
  293. hostnameMode ? (suffix as string[]).join('') : (suffix as string)
  294. );
  295. }
  296. } while (nodeStack.length);
  297. return results;
  298. };
  299. return {
  300. add,
  301. contains,
  302. find,
  303. substractSetInPlaceFromFound,
  304. remove,
  305. delete: remove,
  306. has,
  307. dump,
  308. get size() {
  309. if (smolTree) {
  310. throw new Error('A Trie with smolTree enabled cannot have correct size!');
  311. }
  312. return size;
  313. },
  314. get root() {
  315. return root;
  316. },
  317. [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
  318. };
  319. };
  320. export default createTrie;