Browse Source

Perf: re-use trie when building reject domainsets

SukkaW 1 year ago
parent
commit
5b725192e3
3 changed files with 30 additions and 24 deletions
  1. 11 18
      Build/build-reject-domainset.ts
  2. 13 5
      Build/lib/domain-deduper.ts
  3. 6 1
      Build/lib/trie.ts

+ 11 - 18
Build/build-reject-domainset.ts

@@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
   /** Whitelists */
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
 
-  let domainSets = new Set<string>();
+  const domainSets = new Set<string>();
 
   // Parse from AdGuard Filters
   const shouldStop = await span
@@ -73,11 +73,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
     process.exit(1);
   }
 
-  let previousSize = domainSets.size;
-  console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
+  console.log(`Import ${domainSets.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
 
   // Dedupe domainSets
-  await span.traceChildAsync('dedupe from black keywords', async (childSpan) => {
+  await span.traceChildAsync('dedupe from black keywords/suffixes', async (childSpan) => {
     /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
     const domainKeywordsSet = new Set<string>();
 
@@ -96,16 +95,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
     // Remove as many domains as possible from domainSets before creating trie
     SetSubstract(domainSets, filterRuleWhitelistDomainSets);
 
-    domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
-      const trie = createTrie(domainSets, true, true);
-
-      filterRuleWhitelistDomainSets.forEach(suffix => {
-        trie.whitelist(suffix);
-      });
-
-      return trie.dump();
-    }));
-
+    // Perform kwfilter to remove as many domains as possible from domainSets before creating trie
     childSpan.traceChildSync('dedupe from black keywords', () => {
       const kwfilter = createKeywordFilter(domainKeywordsSet);
 
@@ -116,15 +106,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
         }
       }
     });
+  });
 
-    console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
+  const trie = createTrie(domainSets, true, true);
+  span.traceChildSync('dedupe from white suffixes', () => {
+    filterRuleWhitelistDomainSets.forEach(suffix => {
+      trie.whitelist(suffix);
+    });
   });
-  previousSize = domainSets.size;
 
   // Dedupe domainSets
-  const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
+  const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
 
-  console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
   console.log(`Final size ${dudupedDominArray.length}`);
 
   // Create reject stats

+ 13 - 5
Build/lib/domain-deduper.ts

@@ -1,9 +1,17 @@
-import { createTrie } from './trie';
+import { createTrie, type Trie } from './trie';
+
+export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[];
+export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set<string>;
+export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
+  let trie: Trie;
+  if (Array.isArray(inputDomains)) {
+    trie = createTrie(inputDomains, true, true);
+  } else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
+    throw new Error('Invalid trie');
+  } else {
+    trie = inputDomains;
+  }
 
-export function domainDeduper(inputDomains: string[], toArray?: true): string[];
-export function domainDeduper(inputDomains: string[], toArray: false): Set<string>;
-export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set<string> {
-  const trie = createTrie(inputDomains, true, true);
   const dumped = trie.dump();
   if (toArray) {
     return dumped;

+ 6 - 1
Build/lib/trie.ts

@@ -448,8 +448,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
       return root;
     },
     whitelist,
-    [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
+    [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2),
+
+    hostnameMode,
+    smolTree
   };
 };
 
+export type Trie = ReturnType<typeof createTrie>;
+
 export default createTrie;