Browse Source

Enable trie hostname mode & extend hostname mode test cases

SukkaW 1 year ago
parent
commit
eb0623c1a9

+ 9 - 4
Build/build-cdn-download-conf.ts

@@ -7,9 +7,14 @@ import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
 import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
 import { domainDeduper } from './lib/domain-deduper';
 import { domainDeduper } from './lib/domain-deduper';
 import { appendArrayInPlace } from './lib/append-array-in-place';
 import { appendArrayInPlace } from './lib/append-array-in-place';
+import { sortDomains } from './lib/stable-sort-domain';
 
 
 const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
 const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
-  const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
+  const trie = createTrie(
+    (await getPublicSuffixListTextPromise()).split('\n'),
+    true,
+    false
+  );
 
 
   /**
   /**
    * Extract OSS domain from publicsuffix list
    * Extract OSS domain from publicsuffix list
@@ -69,7 +74,7 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
         'This file contains object storage and static assets CDN domains.'
         'This file contains object storage and static assets CDN domains.'
       ],
       ],
       new Date(),
       new Date(),
-      domainDeduper(cdnDomainsList),
+      sortDomains(domainDeduper(cdnDomainsList)),
       'domainset',
       'domainset',
       path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
       path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
       path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
       path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
@@ -83,10 +88,10 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
         'This file contains domains for software updating & large file hosting.'
         'This file contains domains for software updating & large file hosting.'
       ],
       ],
       new Date(),
       new Date(),
-      domainDeduper([
+      sortDomains(domainDeduper([
         ...downloadDomainSet,
         ...downloadDomainSet,
         ...steamDomainSet
         ...steamDomainSet
-      ]),
+      ])),
       'domainset',
       'domainset',
       path.resolve(import.meta.dir, '../List/domainset/download.conf'),
       path.resolve(import.meta.dir, '../List/domainset/download.conf'),
       path.resolve(import.meta.dir, '../Clash/domainset/download.txt')
       path.resolve(import.meta.dir, '../Clash/domainset/download.txt')

+ 9 - 12
Build/build-microsoft-cdn.ts

@@ -6,6 +6,7 @@ import { createTrie } from './lib/trie';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
 import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
+import { sortDomains } from './lib/stable-sort-domain';
 
 
 const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
 const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
 
 
@@ -25,22 +26,22 @@ const BLACKLIST = [
 
 
 export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
 export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
   // First trie is to find the microsoft domains that matches probe domains
   // First trie is to find the microsoft domains that matches probe domains
-  const trie = createTrie();
+  const trie = createTrie(null, true);
   for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
   for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
     const domain = extractDomainsFromFelixDnsmasq(line);
     const domain = extractDomainsFromFelixDnsmasq(line);
     if (domain) {
     if (domain) {
       trie.add(domain);
       trie.add(domain);
     }
     }
   }
   }
-  const set = new Set(PROBE_DOMAINS.flatMap(domain => trie.find(domain)));
+  const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
 
 
   // Second trie is to remove blacklisted domains
   // Second trie is to remove blacklisted domains
-  const trie2 = createTrie(set);
-  BLACKLIST.forEach(black => {
-    trie2.substractSetInPlaceFromFound(black, set);
-  });
+  const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
+  BLACKLIST.forEach(trie2.whitelist);
 
 
-  return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST);
+  return sortDomains(trie2.dump())
+    .map(d => `DOMAIN-SUFFIX,${d}`)
+    .concat(WHITELIST);
 });
 });
 
 
 export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
 export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
@@ -53,11 +54,7 @@ export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
     ' - https://github.com/felixonmars/dnsmasq-china-list'
     ' - https://github.com/felixonmars/dnsmasq-china-list'
   ];
   ];
 
 
-  const promise = getMicrosoftCdnRulesetPromise();
-  const peeked = Bun.peek(promise);
-  const res: string[] = peeked === promise
-    ? await span.traceChildPromise('get microsoft cdn domains', promise)
-    : (peeked as string[]);
+  const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
 
 
   return createRuleset(
   return createRuleset(
     span,
     span,

+ 89 - 12
Build/lib/trie.test.ts

@@ -20,6 +20,23 @@ describe('Trie', () => {
     expect(trie.has('sukkaw')).toBeFalse();
     expect(trie.has('sukkaw')).toBeFalse();
   });
   });
 
 
+  it('should be possible to add domains to a Trie (hostname).', () => {
+    const trie = createTrie(null, true);
+
+    trie.add('a.skk.moe');
+    trie.add('skk.moe');
+    trie.add('anotherskk.moe');
+
+    expect(trie.size).toBe(3);
+
+    expect(trie.has('a.skk.moe')).toBeTrue();
+    expect(trie.has('skk.moe')).toBeTrue();
+    expect(trie.has('anotherskk.moe')).toBeTrue();
+    expect(trie.has('example.com')).toBeFalse();
+    expect(trie.has('skk.mo')).toBeFalse();
+    expect(trie.has('another.skk.moe')).toBeFalse();
+  });
+
   it('adding the same item several times should not increase size.', () => {
   it('adding the same item several times should not increase size.', () => {
     const trie = createTrie();
     const trie = createTrie();
 
 
@@ -31,9 +48,24 @@ describe('Trie', () => {
     expect(trie.has('rat')).toBeTrue();
     expect(trie.has('rat')).toBeTrue();
   });
   });
 
 
+  it('adding the same item several times should not increase size (hostname).', () => {
+    const trie = createTrie(null, true);
+
+    trie.add('skk.moe');
+    trie.add('blog.skk.moe');
+    trie.add('skk.moe');
+
+    expect(trie.size).toBe(2);
+    expect(trie.has('skk.moe')).toBeTrue();
+  });
+
   it('should be possible to set the null sequence.', () => {
   it('should be possible to set the null sequence.', () => {
-    const trie = createTrie();
+    let trie = createTrie();
+
+    trie.add('');
+    expect(trie.has('')).toBeTrue();
 
 
+    trie = createTrie(null, true);
     trie.add('');
     trie.add('');
     expect(trie.has('')).toBeTrue();
     expect(trie.has('')).toBeTrue();
   });
   });
@@ -61,6 +93,29 @@ describe('Trie', () => {
     expect(trie.size).toBe(0);
     expect(trie.size).toBe(0);
   });
   });
 
 
+  it('should be possible to delete items (hostname).', () => {
+    const trie = createTrie(null, true);
+
+    trie.add('skk.moe');
+    trie.add('example.com');
+    trie.add('moe.sb');
+
+    expect(trie.delete('')).toBeFalse();
+    expect(trie.delete('')).toBeFalse();
+    expect(trie.delete('example.org')).toBeFalse();
+
+    expect(trie.delete('skk.moe')).toBeTrue();
+    expect(trie.has('skk.moe')).toBeFalse();
+    expect(trie.has('moe.sb')).toBeTrue();
+
+    expect(trie.size).toBe(2);
+
+    expect(trie.delete('example.com')).toBeTrue();
+    expect(trie.size).toBe(1);
+    expect(trie.delete('moe.sb')).toBeTrue();
+    expect(trie.size).toBe(0);
+  });
+
   it('should be possible to check the existence of a sequence in the Trie.', () => {
   it('should be possible to check the existence of a sequence in the Trie.', () => {
     const trie = createTrie();
     const trie = createTrie();
 
 
@@ -68,6 +123,18 @@ describe('Trie', () => {
 
 
     expect(trie.has('romanesque')).toBe(true);
     expect(trie.has('romanesque')).toBe(true);
     expect(trie.has('roman')).toBe(false);
     expect(trie.has('roman')).toBe(false);
+    expect(trie.has('esque')).toBe(false);
+    expect(trie.has('')).toBe(false);
+  });
+
+  it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
+    const trie = createTrie(null, true);
+
+    trie.add('example.org.skk.moe');
+
+    expect(trie.has('example.org.skk.moe')).toBe(true);
+    expect(trie.has('skk.moe')).toBe(false);
+    expect(trie.has('example.org')).toBe(false);
     expect(trie.has('')).toBe(false);
     expect(trie.has('')).toBe(false);
   });
   });
 
 
@@ -79,8 +146,6 @@ describe('Trie', () => {
     trie.add('sesqueroman');
     trie.add('sesqueroman');
     trie.add('greek');
     trie.add('greek');
 
 
-    console.log({ trie });
-
     expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
     expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
     expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
     expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
     expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
     expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
@@ -89,13 +154,31 @@ describe('Trie', () => {
     expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
     expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
   });
   });
 
 
-  it('should be possible to create a trie from an arbitrary iterable.', () => {
-    const words = ['roman', 'esqueroman'];
+  it('should be possible to retrieve items matching the given prefix (hostname).', () => {
+    const trie = createTrie(null, true);
+
+    trie.add('example.com');
+    trie.add('blog.example.com');
+    trie.add('cdn.example.com');
+    trie.add('example.org');
 
 
-    const trie = createTrie(words);
+    expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
+    expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
+    expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']);
+    expect(trie.find('org')).toEqual(['example.org']);
+    expect(trie.find('example.net')).toEqual([]);
+    expect(trie.find('')).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
+  });
+
+  it('should be possible to create a trie from an arbitrary iterable.', () => {
+    let trie = createTrie(['roman', 'esqueroman']);
 
 
     expect(trie.size).toBe(2);
     expect(trie.size).toBe(2);
     expect(trie.has('roman')).toBe(true);
     expect(trie.has('roman')).toBe(true);
+
+    trie = createTrie(new Set(['skk.moe', 'example.com']), true);
+    expect(trie.size).toBe(2);
+    expect(trie.has('skk.moe')).toBe(true);
   });
   });
 });
 });
 
 
@@ -106,8 +189,6 @@ describe.each([
   it('should not remove same entry', () => {
   it('should not remove same entry', () => {
     const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
     const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
 
 
-    console.log(trie);
-
     expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
     expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
     expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
     expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
   });
   });
@@ -115,8 +196,6 @@ describe.each([
   it('should match subdomain - 1', () => {
   it('should match subdomain - 1', () => {
     const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
     const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
 
 
-    console.log(trie);
-
     expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
     expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
     expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
     expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
   });
   });
@@ -124,8 +203,6 @@ describe.each([
   it('should match subdomain - 2', () => {
   it('should match subdomain - 2', () => {
     const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
     const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
 
 
-    console.log(trie);
-
     expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
     expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
     expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
     expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
   });
   });

+ 1 - 1
Build/validate-domestic.ts

@@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
     }
     }
   }
   }
 
 
-  const trie = createTrie(set);
+  const trie = createTrie(set, true);
 
 
   const top5000 = new Set<string>();
   const top5000 = new Set<string>();
 
 

+ 1 - 1
Build/validate-gfwlist.ts

@@ -75,7 +75,7 @@ export const parseGfwList = async () => {
   })).text();
   })).text();
   const topDomains = parse(res);
   const topDomains = parse(res);
 
 
-  const trie = createTrie(blackSet);
+  const trie = createTrie(blackSet, true);
 
 
   for await (const [domain] of topDomains) {
   for await (const [domain] of topDomains) {
     if (trie.has(domain)) {
     if (trie.has(domain)) {