瀏覽代碼

Perf: many changes

- Hoist process hosts line callback
- Reduce dp hosts file size
- Reduce domain sort
SukkaW 1 年之前
父節點
當前提交
e5d511d105

+ 2 - 7
Build/build-reject-domainset.ts

@@ -18,14 +18,9 @@ import { getPhishingDomains } from './lib/get-phishing-domains';
 
 import * as SetHelpers from 'mnemonist/set';
 import { setAddFromArray } from './lib/set-add-from-array';
-import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
 
 export const buildRejectDomainSet = task(import.meta.path, async (span) => {
-  const gorhillPromise = getGorhillPublicSuffixPromise();
-  const gorhillPeeked = Bun.peek(gorhillPromise);
-  const gorhill: PublicSuffixList = gorhillPeeked === gorhillPromise
-    ? await gorhillPromise
-    : (gorhillPeeked as PublicSuffixList);
+  const gorhill = await getGorhillPublicSuffixPromise();
 
   /** Whitelists */
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@@ -126,7 +121,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
       const kwfilter = createKeywordFilter(domainKeywordsSet);
 
       for (const domain of domainSets) {
-      // Remove keyword
+        // Remove keyword
         if (kwfilter(domain)) {
           domainSets.delete(domain);
         }

+ 3 - 1
Build/build-reject-ip-list.ts

@@ -65,8 +65,10 @@ const getBotNetFilterIPsPromise = fsFetchCache.apply(
   }
 );
 
+const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
+
 export const buildRejectIPList = task(import.meta.path, async (span) => {
-  const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
+  const result = await localRejectIPSourcesPromise;
 
   const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise);
   const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise);

+ 1 - 1
Build/build-speedtest-domainset.ts

@@ -232,7 +232,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
       });
 
       resolve();
-    }, 1000 * 60 * 2);
+    }, 1000 * 60 * 1.5);
 
     Promise.all(Object.values(pMap)).then(() => {
       clearTimeout(timer);

+ 24 - 23
Build/lib/parse-filter.ts

@@ -46,37 +46,38 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
     }
   ));
 }
-export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
-  const domainSets = new Set<string>();
 
-  const lineCb = (l: string) => {
-    const line = processLine(l);
-    if (!line) {
-      return;
-    }
+const hostsLineCb = (l: string, set: Set<string>, includeAllSubDomain: boolean, meta: string) => {
+  const line = processLine(l);
+  if (!line) {
+    return;
+  }
 
-    const _domain = line.split(/\s/)[1]?.trim();
-    if (!_domain) {
-      return;
-    }
-    const domain = normalizeDomain(_domain);
-    if (!domain) {
-      return;
-    }
-    if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
-      console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
-      foundDebugDomain = true;
-    }
+  const _domain = line.split(/\s/)[1]?.trim();
+  if (!_domain) {
+    return;
+  }
+  const domain = normalizeDomain(_domain);
+  if (!domain) {
+    return;
+  }
+  if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
+    console.warn(picocolors.red(meta), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
+    foundDebugDomain = true;
+  }
 
-    domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
-  };
+  set.add(includeAllSubDomain ? `.${domain}` : domain);
+};
+
+export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
+  const domainSets = new Set<string>();
 
   return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
     hostsUrl,
     async () => {
       if (mirrors == null || mirrors.length === 0) {
         for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
-          lineCb(l);
+          hostsLineCb(l, domainSets, includeAllSubDomain, hostsUrl);
         }
       } else {
         const filterRules = await childSpan
@@ -85,7 +86,7 @@ export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | n
 
         childSpan.traceChild('parse hosts').traceSyncFn(() => {
           for (let i = 0, len = filterRules.length; i < len; i++) {
-            lineCb(filterRules[i]);
+            hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
           }
         });
       }

+ 2 - 1
Build/lib/reject-data-source.ts

@@ -9,7 +9,8 @@ export const HOSTS: HostsSource[] = [
     true,
     TTL.THREE_HOURS()
   ],
-  ['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
+  // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
+  ['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
   // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
   ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
   // have not been updated for more than a year, so we set a 14 days cache ttl

+ 9 - 1
Build/lib/stable-sort-domain.ts

@@ -42,7 +42,15 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
 
   const sorter = (a: string, b: string) => {
     if (a === b) return 0;
-    return compare(domains.get(a)!, domains.get(b)!) || compare(a, b);
+
+    const $a = domains.get(a)!;
+    const $b = domains.get(b)!;
+
+    // avoid compare same thing twice
+    if (a === $a && b === $b) {
+      return compare(a, b);
+    }
+    return compare($a, $b) || compare(a, b);
   };
 
   return inputs.sort(sorter);