trie.test.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. import { describe, it } from 'mocha';
  2. import { expect } from 'earl';
  3. import { HostnameSmolTrie, HostnameTrie } from './trie';
  4. function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
  5. function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
  6. function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
  7. if (smolTree) {
  8. return new HostnameSmolTrie(from);
  9. }
  10. return new HostnameTrie(from);
  11. };
  12. // describe('hostname to tokens', () => {
  13. // it('should split hostname into tokens.', () => {
  14. // expect(hostnameToTokens('.blog.skk.moe')).toEqual([
  15. // '.',
  16. // 'blog',
  17. // '.',
  18. // 'skk',
  19. // '.',
  20. // 'moe'
  21. // ]);
  22. // expect(hostnameToTokens('blog.skk.moe')).toEqual([
  23. // 'blog',
  24. // '.',
  25. // 'skk',
  26. // '.',
  27. // 'moe'
  28. // ]);
  29. // expect(hostnameToTokens('skk.moe')).toEqual([
  30. // 'skk',
  31. // '.',
  32. // 'moe'
  33. // ]);
  34. // expect(hostnameToTokens('moe')).toEqual([
  35. // 'moe'
  36. // ]);
  37. // });
  38. // });
  39. describe('Trie', () => {
  40. it('should be possible to add domains to a Trie.', () => {
  41. const trie = createTrie(null, false);
  42. trie.add('a.skk.moe');
  43. trie.add('skk.moe');
  44. trie.add('anotherskk.moe');
  45. expect(trie.size).toEqual(3);
  46. expect(trie.has('a.skk.moe')).toEqual(true);
  47. expect(trie.has('skk.moe')).toEqual(true);
  48. expect(trie.has('anotherskk.moe')).toEqual(true);
  49. expect(trie.has('example.com')).toEqual(false);
  50. expect(trie.has('skk.mo')).toEqual(false);
  51. expect(trie.has('another.skk.moe')).toEqual(false);
  52. });
  53. it('adding the same item several times should not increase size.', () => {
  54. const trie = createTrie(null, false);
  55. trie.add('skk.moe');
  56. trie.add('blog.skk.moe');
  57. // eslint-disable-next-line sukka/no-element-overwrite -- deliberately do testing
  58. trie.add('skk.moe');
  59. expect(trie.size).toEqual(2);
  60. expect(trie.has('skk.moe')).toEqual(true);
  61. });
  62. it('should be possible to set the null sequence.', () => {
  63. const trie = createTrie(null, false);
  64. trie.add('');
  65. expect(trie.has('')).toEqual(true);
  66. const trie2 = createTrie(null, true);
  67. trie2.add('');
  68. expect(trie2.has('')).toEqual(true);
  69. });
  70. it('should be possible to delete items.', () => {
  71. const trie = createTrie(null, false);
  72. trie.add('skk.moe');
  73. trie.add('blog.skk.moe');
  74. trie.add('example.com');
  75. trie.add('moe.sb');
  76. expect(trie.delete('no-match.com')).toEqual(false);
  77. expect(trie.delete('example.org')).toEqual(false);
  78. expect(trie.delete('skk.moe')).toEqual(true);
  79. expect(trie.has('skk.moe')).toEqual(false);
  80. expect(trie.has('moe.sb')).toEqual(true);
  81. expect(trie.size).toEqual(3);
  82. expect(trie.delete('example.com')).toEqual(true);
  83. expect(trie.size).toEqual(2);
  84. expect(trie.delete('moe.sb')).toEqual(true);
  85. expect(trie.size).toEqual(1);
  86. });
  87. it('should be possible to check the existence of a sequence in the Trie.', () => {
  88. const trie = createTrie(null, true);
  89. trie.add('example.org.skk.moe');
  90. expect(trie.has('example.org.skk.moe')).toEqual(true);
  91. expect(trie.has('skk.moe')).toEqual(false);
  92. expect(trie.has('example.org')).toEqual(false);
  93. expect(trie.has('')).toEqual(false);
  94. });
  95. it('should be possible to retrieve items matching the given prefix.', () => {
  96. const trie = createTrie(null, false);
  97. trie.add('example.com');
  98. trie.add('blog.example.com');
  99. trie.add('cdn.example.com');
  100. trie.add('example.org');
  101. expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
  102. expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
  103. expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']);
  104. expect(trie.find('org')).toEqual(['example.org']);
  105. expect(trie.find('example.net')).toEqual([]);
  106. expect(trie.dump()).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
  107. });
  108. it('should be possible to retrieve items matching the given prefix even with a smol trie', () => {
  109. const trie = createTrie(null, true);
  110. trie.add('.example.com');
  111. trie.add('example.com');
  112. trie.add('blog.example.com');
  113. trie.add('cdn.example.com');
  114. trie.add('example.org');
  115. expect(trie.find('example.com')).toEqual(['.example.com']);
  116. expect(trie.find('com')).toEqual(['.example.com']);
  117. expect(trie.find('.example.com')).toEqual(['.example.com']);
  118. expect(trie.find('org')).toEqual(['example.org']);
  119. expect(trie.find('example.net')).toEqual([]);
  120. expect(trie.dump()).toEqual(['example.org', '.example.com']);
  121. });
  122. it('should be possible to create a trie from an arbitrary iterable.', () => {
  123. let trie = createTrie(['skk.moe', 'blog.skk.moe'], false);
  124. expect(trie.size).toEqual(2);
  125. expect(trie.has('skk.moe')).toEqual(true);
  126. trie = createTrie(new Set(['skk.moe', 'example.com']), false);
  127. expect(trie.size).toEqual(2);
  128. expect(trie.has('skk.moe')).toEqual(true);
  129. });
  130. });
  131. describe('surge domainset dedupe', () => {
  132. it('should not remove same entry', () => {
  133. const trie = createTrie(['.skk.moe', 'noc.one'], false);
  134. expect(trie.find('.skk.moe')).toEqual(['.skk.moe']);
  135. expect(trie.find('noc.one')).toEqual(['noc.one']);
  136. });
  137. it('should match subdomain - 1', () => {
  138. const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], false);
  139. expect(trie.find('.skk.moe')).toEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
  140. expect(trie.find('.sukkaw.com')).toEqual(['www.sukkaw.com']);
  141. });
  142. it('should match subdomain - 2', () => {
  143. const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], false);
  144. expect(trie.find('.skk.moe')).toEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
  145. expect(trie.find('.sukkaw.com')).toEqual(['www.sukkaw.com']);
  146. });
  147. it('should not remove non-subdomain', () => {
  148. const trie = createTrie(['skk.moe', 'sukkaskk.moe'], false);
  149. expect(trie.find('.skk.moe')).toEqual([]);
  150. });
  151. });
  152. describe('smol tree', () => {
  153. it('should init tree', () => {
  154. const trie = createTrie([
  155. 'skk.moe',
  156. 'anotherskk.moe',
  157. 'blog.anotherskk.moe',
  158. 'blog.skk.moe',
  159. '.cdn.local',
  160. 'blog.img.skk.local',
  161. 'img.skk.local'
  162. ], true);
  163. expect(trie.dump()).toEqual([
  164. 'img.skk.local',
  165. 'blog.img.skk.local',
  166. '.cdn.local',
  167. 'anotherskk.moe',
  168. 'blog.anotherskk.moe',
  169. 'skk.moe',
  170. 'blog.skk.moe'
  171. ]);
  172. });
  173. it('should create simple tree - 1', () => {
  174. const trie = createTrie([
  175. '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
  176. 'www.noc.one', 'cdn.noc.one',
  177. '.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
  178. ], true);
  179. expect(trie.dump()).toEqual([
  180. '.sub.example.com',
  181. 'cdn.noc.one',
  182. 'www.noc.one',
  183. '.skk.moe'
  184. ]);
  185. });
  186. it('should create simple tree - 2', () => {
  187. const trie = createTrie([
  188. '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
  189. ], true);
  190. expect(trie.dump()).toEqual([
  191. '.skk.moe'
  192. ]);
  193. });
  194. it('should create simple tree - 3', () => {
  195. const trie = createTrie([
  196. '.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
  197. ], true);
  198. expect(trie.dump()).toEqual([
  199. '.sub.example.com'
  200. ]);
  201. trie.add('.sub.example.com');
  202. expect(trie.dump()).toEqual([
  203. '.sub.example.com'
  204. ]);
  205. });
  206. it('should create simple tree - 3', () => {
  207. const trie = createTrie([
  208. 'commercial.shouji.360.cn',
  209. 'act.commercial.shouji.360.cn',
  210. 'cdn.creative.medialytics.com',
  211. 'px.cdn.creative.medialytics.com'
  212. ], true);
  213. expect(trie.dump()).toEqual([
  214. 'cdn.creative.medialytics.com',
  215. 'px.cdn.creative.medialytics.com',
  216. 'commercial.shouji.360.cn',
  217. 'act.commercial.shouji.360.cn'
  218. ]);
  219. });
  220. it('should dedupe subdomain properly', () => {
  221. const trie = createTrie([
  222. 'skk.moe',
  223. 'anotherskk.moe',
  224. 'blog.anotherskk.moe',
  225. 'blog.skk.moe'
  226. ], true);
  227. expect(trie.dump()).toEqual([
  228. 'anotherskk.moe',
  229. 'blog.anotherskk.moe',
  230. 'skk.moe',
  231. 'blog.skk.moe'
  232. ]);
  233. });
  234. it('should effctly whitelist domains', () => {
  235. const trie = createTrie([
  236. 'skk.moe',
  237. 'anotherskk.moe',
  238. 'blog.anotherskk.moe',
  239. 'blog.skk.moe',
  240. '.cdn.local',
  241. 'blog.img.skk.local',
  242. 'img.skk.local'
  243. ], true);
  244. trie.whitelist('.skk.moe');
  245. expect(trie.dump()).toEqual([
  246. 'img.skk.local',
  247. 'blog.img.skk.local',
  248. '.cdn.local',
  249. 'anotherskk.moe',
  250. 'blog.anotherskk.moe'
  251. ]);
  252. trie.whitelist('anotherskk.moe');
  253. expect(trie.dump()).toEqual([
  254. 'img.skk.local',
  255. 'blog.img.skk.local',
  256. '.cdn.local',
  257. 'blog.anotherskk.moe'
  258. ]);
  259. trie.add('anotherskk.moe');
  260. trie.whitelist('.anotherskk.moe');
  261. expect(trie.dump()).toEqual([
  262. 'img.skk.local',
  263. 'blog.img.skk.local',
  264. '.cdn.local'
  265. ]);
  266. trie.whitelist('img.skk.local');
  267. expect(trie.dump()).toEqual([
  268. 'blog.img.skk.local',
  269. '.cdn.local'
  270. ]);
  271. trie.whitelist('cdn.local');
  272. expect(trie.dump()).toEqual([
  273. 'blog.img.skk.local'
  274. ]);
  275. trie.whitelist('.skk.local');
  276. expect(trie.dump()).toEqual([]);
  277. });
  278. it('should whitelist trie correctly', () => {
  279. const trie = createTrie([
  280. '.t.co',
  281. 't.co',
  282. 'example.t.co',
  283. '.skk.moe',
  284. 'blog.cdn.example.com',
  285. 'cdn.example.com'
  286. ], true);
  287. expect(trie.dump()).toEqual([
  288. 'cdn.example.com', 'blog.cdn.example.com',
  289. '.skk.moe',
  290. '.t.co'
  291. ]);
  292. trie.whitelist('.t.co');
  293. expect(trie.dump()).toEqual([
  294. 'cdn.example.com', 'blog.cdn.example.com', '.skk.moe'
  295. ]);
  296. trie.whitelist('skk.moe');
  297. expect(trie.dump()).toEqual(['cdn.example.com', 'blog.cdn.example.com']);
  298. trie.whitelist('cdn.example.com');
  299. expect(trie.dump()).toEqual(['blog.cdn.example.com']);
  300. });
  301. it('contains - normal', () => {
  302. const trie = createTrie([
  303. 'skk.moe',
  304. 'anotherskk.moe',
  305. 'blog.anotherskk.moe',
  306. 'blog.skk.moe'
  307. ], true);
  308. expect(trie.contains('skk.moe')).toEqual(true);
  309. expect(trie.contains('blog.skk.moe')).toEqual(true);
  310. expect(trie.contains('anotherskk.moe')).toEqual(true);
  311. expect(trie.contains('blog.anotherskk.moe')).toEqual(true);
  312. expect(trie.contains('example.com')).toEqual(false);
  313. expect(trie.contains('blog.example.com')).toEqual(false);
  314. expect(trie.contains('skk.mo')).toEqual(false);
  315. expect(trie.contains('cdn.skk.moe')).toEqual(false);
  316. });
  317. it('contains - subdomain', () => {
  318. const trie = createTrie([
  319. 'index.rubygems.org'
  320. ], true);
  321. expect(trie.contains('rubygems.org')).toEqual(false);
  322. expect(trie.contains('index.rubygems.org')).toEqual(true);
  323. expect(trie.contains('sub.index.rubygems.org')).toEqual(false);
  324. });
  325. it('contains - include subdomains', () => {
  326. const trie = createTrie([
  327. '.skk.moe'
  328. ], true);
  329. expect(trie.contains('skk.moe')).toEqual(true);
  330. expect(trie.contains('blog.skk.moe')).toEqual(true);
  331. expect(trie.contains('image.cdn.skk.moe')).toEqual(true);
  332. expect(trie.contains('example.com')).toEqual(false);
  333. expect(trie.contains('blog.example.com')).toEqual(false);
  334. expect(trie.contains('skk.mo')).toEqual(false);
  335. });
  336. });