瀏覽代碼

Update Reject Hosts / Add mirror support for Hosts Source

SukkaW 2 年之前
父節點
當前提交
af8cce4f45

+ 30 - 28
Build/build-common.ts

@@ -91,34 +91,36 @@ const processFile = (span: Span, sourcePath: string) => {
   });
 };
 
-async function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
-  const span = parentSpan.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`);
-
-  const res = await processFile(span, sourcePath);
-  if (!res) return;
-
-  const [title, descriptions, lines] = res;
-
-  const deduped = domainDeduper(lines);
-  const description = [
-    ...SHARED_DESCRIPTION,
-    ...(
-      descriptions.length
-        ? ['', ...descriptions]
-        : []
-    )
-  ];
-
-  return span.traceAsyncFn(() => createRuleset(
-    span,
-    title,
-    description,
-    new Date(),
-    deduped,
-    'domainset',
-    path.resolve(outputSurgeDir, relativePath),
-    path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
-  ));
+function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
+  return parentSpan
+    .traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`)
+    .traceAsyncFn(async (span) => {
+      const res = await processFile(span, sourcePath);
+      if (!res) return;
+
+      const [title, descriptions, lines] = res;
+
+      const deduped = domainDeduper(lines);
+      const description = [
+        ...SHARED_DESCRIPTION,
+        ...(
+          descriptions.length
+            ? ['', ...descriptions]
+            : []
+        )
+      ];
+
+      return createRuleset(
+        span,
+        title,
+        description,
+        new Date(),
+        deduped,
+        'domainset',
+        path.resolve(outputSurgeDir, relativePath),
+        path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
+      );
+    });
 }
 
 /**

+ 6 - 4
Build/build-reject-domainset.ts

@@ -27,14 +27,15 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
 
   const domainSets = new Set<string>();
 
-  let shouldStop = false;
   // Parse from AdGuard Filters
-  await span
+  const shouldStop = await span
     .traceChild('download and process hosts / adblock filter rules')
     .traceAsyncFn(async (childSpan) => {
+      // eslint-disable-next-line sukka/no-single-return -- not single return
+      let shouldStop = false;
       await Promise.all([
         // Parse from remote hosts & domain lists
-        ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
+        ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))),
 
         ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
 
@@ -44,6 +45,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
             : processFilterRules(childSpan, input[0], input[1], input[2])
         ).then(({ white, black, foundDebugDomain }) => {
           if (foundDebugDomain) {
+            // eslint-disable-next-line sukka/no-single-return -- not single return
             shouldStop = true;
             // we should not break here, as we want to see full matches from all data source
           }
@@ -65,7 +67,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
           setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
         })
       ]);
-
+      // eslint-disable-next-line sukka/no-single-return -- not single return
       return shouldStop;
     });
 

+ 25 - 6
Build/lib/parse-filter.ts

@@ -44,25 +44,25 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
     }
   ));
 }
-export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
-  return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn(() => fsCache.apply(
+export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
+  return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsCache.apply(
     hostsUrl,
     async () => {
       const domainSets = new Set<string>();
 
-      for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
+      const lineCb = (l: string) => {
         const line = processLine(l);
         if (!line) {
-          continue;
+          return;
         }
 
         const _domain = line.split(/\s/)[1]?.trim();
         if (!_domain) {
-          continue;
+          return;
         }
         const domain = normalizeDomain(_domain);
         if (!domain) {
-          continue;
+          return;
         }
         if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
           console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
@@ -70,6 +70,25 @@ export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain =
         }
 
         domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
+      };
+
+      if (mirrors == null || mirrors.length === 0) {
+        for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
+          lineCb(l);
+        }
+      } else {
+        // Avoid event loop starvation, so we wait for a macrotask before we start fetching.
+        await Promise.resolve();
+
+        const filterRules = await childSpan.traceChild('download hosts').traceAsyncFn(() => {
+          return fetchAssets(hostsUrl, mirrors).then(text => text.split('\n'));
+        });
+
+        childSpan.traceChild('parse hosts').traceSyncFn(() => {
+          for (let i = 0, len = filterRules.length; i < len; i++) {
+            lineCb(filterRules[i]);
+          }
+        });
       }
 
       console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));

+ 25 - 12
Build/lib/reject-data-source.ts

@@ -1,21 +1,34 @@
 import { TTL } from './cache-filesystem';
 
-export const HOSTS = [
-  ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
-  ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
+type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number];
+
+export const HOSTS: HostsSource[] = [
+  [
+    'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext',
+    ['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'],
+    true,
+    TTL.THREE_HOURS()
+  ],
+  ['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
   // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
-  ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
+  ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
   // have not been updated for more than a year, so we set a 14 days cache ttl
-  ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
-  ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],
-  ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()],
+  ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()],
+  ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.THREE_DAYS()],
+  ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
   // ad-wars is not actively maintained, so we set a 7 days cache ttl
-  ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()],
-  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()],
+  ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
+  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
   // Curben's UrlHaus Malicious URL Blocklist
-  // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
-  // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
-  ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
+  [
+    'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt',
+    [
+      'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt',
+      'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt'
+    ],
+    true,
+    TTL.THREE_HOURS()
+  ]
   // Curben's Phishing URL Blocklist
   // Covered by lib/get-phishing-domains.ts
   // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'

+ 1 - 7
Build/lib/trace-runner.ts

@@ -9,7 +9,7 @@ export function traceSync<T>(prefix: string, fn: () => T, timeFormatter: Formatt
   console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
   return result;
 }
-traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
+// traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
 
 export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFormatter: Formatter = picocolors.blue): Promise<T> => {
   const start = Bun.nanoseconds();
@@ -18,9 +18,3 @@ export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFo
   console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
   return result;
 };
-
-export interface TaskResult {
-  readonly start: number,
-  readonly end: number,
-  readonly taskName: string
-}

+ 1 - 1
Build/trace/index.ts

@@ -52,7 +52,7 @@ export const createSpan = (name: string, parentTraceResult?: TraceResult): Span
 
   const stop = (time?: number) => {
     if (status === SPAN_STATUS_END) {
-      throw new Error('span already stopped');
+      throw new Error(`span already stopped: ${name}`);
     }
     const end = time ?? Bun.nanoseconds();
 

+ 3 - 2
Source/non_ip/reject.conf

@@ -108,9 +108,7 @@ DOMAIN-KEYWORD,adjust.
 DOMAIN-KEYWORD,appsflyer
 DOMAIN-KEYWORD,dnserror
 DOMAIN-KEYWORD,marketing.net
-AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
 DOMAIN,stun.smartgslb.com
-AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))
 
 DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
 DOMAIN-KEYWORD,-logging.nextmedia.com
@@ -120,7 +118,10 @@ DOMAIN-KEYWORD,.engage.3m.
 # -telemetry.officeapps.live.com
 DOMAIN-KEYWORD,telemetry.officeapps.live.com
 DOMAIN-KEYWORD,-launches.appsflyersdk.com
+DOMAIN-KEYWORD,-s2s.sensic.net
 
+AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
+AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))
 AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))
 
 # Important: Force add the following domains without whitelisting