trie.test.ts 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. import { createTrie, hostnameToTokens } from './trie';
  2. import { describe, it } from 'mocha';
  3. import { expect } from 'chai';
  4. describe('hostname to tokens', () => {
  5. it('should split hostname into tokens.', () => {
  6. expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
  7. '.',
  8. 'blog',
  9. '.',
  10. 'skk',
  11. '.',
  12. 'moe'
  13. ]);
  14. expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
  15. 'blog',
  16. '.',
  17. 'skk',
  18. '.',
  19. 'moe'
  20. ]);
  21. expect(hostnameToTokens('skk.moe')).to.deep.equal([
  22. 'skk',
  23. '.',
  24. 'moe'
  25. ]);
  26. expect(hostnameToTokens('moe')).to.deep.equal([
  27. 'moe'
  28. ]);
  29. });
  30. });
  31. describe('Trie', () => {
  32. it('should be possible to add domains to a Trie.', () => {
  33. const trie = createTrie(null, false);
  34. trie.add('a.skk.moe');
  35. trie.add('skk.moe');
  36. trie.add('anotherskk.moe');
  37. expect(trie.size).to.equal(3);
  38. expect(trie.has('a.skk.moe')).to.equal(true);
  39. expect(trie.has('skk.moe')).to.equal(true);
  40. expect(trie.has('anotherskk.moe')).to.equal(true);
  41. expect(trie.has('example.com')).to.equal(false);
  42. expect(trie.has('skk.mo')).to.equal(false);
  43. expect(trie.has('another.skk.moe')).to.equal(false);
  44. });
  45. it('adding the same item several times should not increase size.', () => {
  46. const trie = createTrie(null, false);
  47. trie.add('skk.moe');
  48. trie.add('blog.skk.moe');
  49. trie.add('skk.moe');
  50. expect(trie.size).to.equal(2);
  51. expect(trie.has('skk.moe')).to.equal(true);
  52. });
  53. it('should be possible to set the null sequence.', () => {
  54. let trie = createTrie(null, false);
  55. trie.add('');
  56. expect(trie.has('')).to.equal(true);
  57. trie = createTrie(null, true);
  58. trie.add('');
  59. expect(trie.has('')).to.equal(true);
  60. });
  61. it('should be possible to delete items.', () => {
  62. const trie = createTrie(null);
  63. trie.add('skk.moe');
  64. trie.add('example.com');
  65. trie.add('moe.sb');
  66. expect(trie.delete('')).to.equal(false);
  67. expect(trie.delete('')).to.equal(false);
  68. expect(trie.delete('example.org')).to.equal(false);
  69. expect(trie.delete('skk.moe')).to.equal(true);
  70. expect(trie.has('skk.moe')).to.equal(false);
  71. expect(trie.has('moe.sb')).to.equal(true);
  72. expect(trie.size).to.equal(2);
  73. expect(trie.delete('example.com')).to.equal(true);
  74. expect(trie.size).to.equal(1);
  75. expect(trie.delete('moe.sb')).to.equal(true);
  76. expect(trie.size).to.equal(0);
  77. });
  78. it('should be possible to check the existence of a sequence in the Trie.', () => {
  79. const trie = createTrie(null, true);
  80. trie.add('example.org.skk.moe');
  81. expect(trie.has('example.org.skk.moe')).to.equal(true);
  82. expect(trie.has('skk.moe')).to.equal(false);
  83. expect(trie.has('example.org')).to.equal(false);
  84. expect(trie.has('')).to.equal(false);
  85. });
  86. it('should be possible to retrieve items matching the given prefix.', () => {
  87. const trie = createTrie(null);
  88. trie.add('example.com');
  89. trie.add('blog.example.com');
  90. trie.add('cdn.example.com');
  91. trie.add('example.org');
  92. expect(trie.find('example.com')).to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
  93. expect(trie.find('com')).to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
  94. expect(trie.find('.example.com')).to.deep.equal(['cdn.example.com', 'blog.example.com']);
  95. expect(trie.find('org')).to.deep.equal(['example.org']);
  96. expect(trie.find('example.net')).to.deep.equal([]);
  97. expect(trie.find('')).to.deep.equal(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
  98. });
  99. it('should be possible to retrieve items matching the given prefix even with a smol trie.', () => {
  100. const trie = createTrie(null, true);
  101. trie.add('.example.com');
  102. trie.add('example.com');
  103. trie.add('blog.example.com');
  104. trie.add('cdn.example.com');
  105. trie.add('example.org');
  106. expect(trie.find('example.com')).to.deep.equal(['.example.com']);
  107. expect(trie.find('com')).to.deep.equal(['.example.com']);
  108. expect(trie.find('.example.com')).to.deep.equal(['.example.com']);
  109. expect(trie.find('org')).to.deep.equal(['example.org']);
  110. expect(trie.find('example.net')).to.deep.equal([]);
  111. expect(trie.find('')).to.deep.equal(['example.org', '.example.com']);
  112. });
  113. it('should be possible to create a trie from an arbitrary iterable.', () => {
  114. let trie = createTrie(['skk.moe', 'blog.skk.moe']);
  115. expect(trie.size).to.equal(2);
  116. expect(trie.has('skk.moe')).to.equal(true);
  117. trie = createTrie(new Set(['skk.moe', 'example.com']));
  118. expect(trie.size).to.equal(2);
  119. expect(trie.has('skk.moe')).to.equal(true);
  120. });
  121. });
  122. describe('surge domainset dedupe', () => {
  123. it('should not remove same entry', () => {
  124. const trie = createTrie(['.skk.moe', 'noc.one']);
  125. expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
  126. expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
  127. });
  128. it('should match subdomain - 1', () => {
  129. const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
  130. expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
  131. expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
  132. });
  133. it('should match subdomain - 2', () => {
  134. const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
  135. expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
  136. expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
  137. });
  138. it('should not remove non-subdomain', () => {
  139. const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
  140. expect(trie.find('.skk.moe')).to.deep.equal([]);
  141. });
  142. });
  143. describe('smol tree', () => {
  144. it('should create simple tree - 1', () => {
  145. const trie = createTrie([
  146. '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
  147. 'www.noc.one', 'cdn.noc.one',
  148. '.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
  149. ], true);
  150. expect(trie.dump()).to.deep.equal([
  151. '.sub.example.com',
  152. 'cdn.noc.one', 'www.noc.one',
  153. '.skk.moe'
  154. ]);
  155. });
  156. it('should create simple tree - 2', () => {
  157. const trie = createTrie([
  158. '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
  159. ], true);
  160. expect(trie.dump()).to.deep.equal([
  161. '.skk.moe'
  162. ]);
  163. });
  164. it('should create simple tree - 2', () => {
  165. const trie = createTrie([
  166. '.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
  167. ], true);
  168. expect(trie.dump()).to.deep.equal([
  169. '.sub.example.com'
  170. ]);
  171. trie.add('.sub.example.com');
  172. expect(trie.dump()).to.deep.equal([
  173. '.sub.example.com'
  174. ]);
  175. });
  176. it('should create simple tree - 3', () => {
  177. const trie = createTrie([
  178. 'commercial.shouji.360.cn',
  179. 'act.commercial.shouji.360.cn',
  180. 'cdn.creative.medialytics.com',
  181. 'px.cdn.creative.medialytics.com'
  182. ], true);
  183. expect(trie.dump()).to.deep.equal([
  184. 'cdn.creative.medialytics.com',
  185. 'px.cdn.creative.medialytics.com',
  186. 'commercial.shouji.360.cn',
  187. 'act.commercial.shouji.360.cn'
  188. ]);
  189. });
  190. it('should dedupe subdomain properly', () => {
  191. const trie = createTrie([
  192. 'skk.moe',
  193. 'anotherskk.moe',
  194. 'blog.anotherskk.moe',
  195. 'blog.skk.moe'
  196. ], true);
  197. expect(trie.dump()).to.deep.equal([
  198. 'anotherskk.moe',
  199. 'blog.anotherskk.moe',
  200. 'skk.moe',
  201. 'blog.skk.moe'
  202. ]);
  203. });
  204. it('should efficiently whitelist domains', () => {
  205. const trie = createTrie([
  206. 'skk.moe',
  207. 'anotherskk.moe',
  208. 'blog.anotherskk.moe',
  209. 'blog.skk.moe'
  210. ], true);
  211. expect(trie.dump()).to.deep.equal([
  212. 'anotherskk.moe',
  213. 'blog.anotherskk.moe',
  214. 'skk.moe',
  215. 'blog.skk.moe'
  216. ]);
  217. trie.whitelist('.skk.moe');
  218. expect(trie.dump()).to.deep.equal([
  219. 'anotherskk.moe',
  220. 'blog.anotherskk.moe'
  221. ]);
  222. trie.whitelist('anotherskk.moe');
  223. expect(trie.dump()).to.deep.equal([
  224. 'blog.anotherskk.moe'
  225. ]);
  226. trie.add('anotherskk.moe');
  227. trie.whitelist('.anotherskk.moe');
  228. expect(trie.dump()).to.deep.equal([]);
  229. });
  230. it('should whitelist trie correctly', () => {
  231. const trie = createTrie([
  232. '.t.co',
  233. 't.co',
  234. 'example.t.co',
  235. '.skk.moe',
  236. 'blog.cdn.example.com',
  237. 'cdn.example.com'
  238. ], true);
  239. expect(trie.dump()).to.deep.equal([
  240. 'cdn.example.com', 'blog.cdn.example.com',
  241. '.skk.moe',
  242. '.t.co'
  243. ]);
  244. trie.whitelist('.t.co');
  245. expect(trie.dump()).to.deep.equal([
  246. 'cdn.example.com', 'blog.cdn.example.com',
  247. '.skk.moe'
  248. ]);
  249. trie.whitelist('skk.moe');
  250. expect(trie.dump()).to.deep.equal(['cdn.example.com', 'blog.cdn.example.com']);
  251. trie.whitelist('cdn.example.com');
  252. expect(trie.dump()).to.deep.equal(['blog.cdn.example.com']);
  253. });
  254. });