Browse Source

Fix: avoid non-doaminlist into trie

SukkaW 1 year ago
parent
commit
a004ffb960

+ 11 - 1
Build/build-cdn-download-conf.ts

@@ -10,10 +10,20 @@ import { appendArrayInPlace } from './lib/append-array-in-place';
 import { sortDomains } from './lib/stable-sort-domain';
 import { output } from './lib/misc';
 import { SOURCE_DIR } from './constants/dir';
+import { processLine } from './lib/process-line';
 
 const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
   const trie = createTrie(
-    await getPublicSuffixListTextPromise(),
+    (await getPublicSuffixListTextPromise()).reduce<string[]>(
+      (acc, cur) => {
+        const tmp = processLine(cur);
+        if (tmp) {
+          acc.push(tmp);
+        }
+        return acc;
+      },
+      []
+    ),
     false
   );
 

+ 2 - 3
Build/build-reject-domainset.ts

@@ -7,7 +7,6 @@ import { createTrie } from './lib/trie';
 
 import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
 import { createRuleset, compareAndWriteFile } from './lib/create-file';
-import { domainsetDeduper } from './lib/domain-deduper';
 import createKeywordFilter from './lib/aho-corasick';
 import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
@@ -148,8 +147,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
   });
 
   // Dedupe domainSets
-  const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => domainsetDeduper(baseTrie));
-  const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => domainsetDeduper(extraTrie));
+  const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump());
+  const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump());
 
   console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`);
 

+ 1 - 2
Build/build-speedtest-domainset.ts

@@ -1,4 +1,3 @@
-import { domainsetDeduper } from './lib/domain-deduper';
 import path from 'node:path';
 import { createRuleset } from './lib/create-file';
 import { sortDomains } from './lib/stable-sort-domain';
@@ -235,7 +234,7 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
     }
   }))));
 
-  const deduped = span.traceChildSync('sort result', () => sortDomains(domainsetDeduper(domainTrie)));
+  const deduped = span.traceChildSync('sort result', () => sortDomains(domainTrie.dump()));
 
   const description = [
     ...SHARED_DESCRIPTION,

+ 1 - 0
Build/lib/process-line.ts

@@ -16,6 +16,7 @@ export const processLine = (line: string): string | null => {
     || line_0 === '\r'
     || line_0 === '\n'
     || line_0 === '!'
+    || (line_0 === '/' && trimmed[1] === '/')
   ) {
     return null;
   }