ソースを参照

Separate DOMAIN/DOMAIN-SUFFIX when AdGuard parsing filters

SukkaW 1 年間 前
コミット
b378a4e87c
4 ファイル変更39 行追加135 行削除
  1. 17 9
      Build/build-reject-domainset.ts
  2. 16 120
      Build/lib/parse-filter/filters.ts
  3. 1 1
      package.json
  4. 5 5
      pnpm-lock.yaml

+ 17 - 9
Build/build-reject-domainset.ts

@@ -85,21 +85,29 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
       domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
 
       adguardFiltersDownloads.map(
-        task => task(childSpan).then(({ white, black }) => {
-          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-          appendArrayToRejectOutput(black);
+        task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
+
+          rejectOutput.bulkAddDomain(blackDomains);
+          rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
         })
       ),
       adguardFiltersExtraDownloads.map(
-        task => task(childSpan).then(({ white, black }) => {
-          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-          appendArrayToRejectExtraOutput(black);
+        task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
+
+          rejectOutput.bulkAddDomain(blackDomains);
+          rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
         })
       ),
       adguardFiltersWhitelistsDownloads.map(
-        task => task(childSpan).then(({ white, black }) => {
-          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-          addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
+        task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomains);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomainSuffixes, suffix => '.' + suffix);
         })
       ),
       getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),

+ 16 - 120
Build/lib/parse-filter/filters.ts

@@ -27,11 +27,14 @@ export function processFilterRulesWithPreload(
 ) {
   const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
 
-  return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => {
+  return (span: Span) => span.traceChildAsync<Record<'whiteDomains' | 'whiteDomainSuffixes' | 'blackDomains' | 'blackDomainSuffixes', string[]>>(`process filter rules: ${filterRulesUrl}`, async (span) => {
     const text = await span.traceChildPromise('download', downloadPromise);
 
-    const whitelistDomainSets = new Set<string>();
-    const blacklistDomainSets = new Set<string>();
+    const whiteDomains = new Set<string>();
+    const whiteDomainSuffixes = new Set<string>();
+
+    const blackDomains = new Set<string>();
+    const blackDomainSuffixes = new Set<string>();
 
     const warningMessages: string[] = [];
 
@@ -60,24 +63,16 @@ export function processFilterRulesWithPreload(
 
       switch (flag) {
         case ParseType.WhiteIncludeSubdomain:
-          if (hostname[0] === '.') {
-            whitelistDomainSets.add(hostname);
-          } else {
-            whitelistDomainSets.add(`.${hostname}`);
-          }
+          whiteDomainSuffixes.add(hostname);
           break;
         case ParseType.WhiteAbsolute:
-          whitelistDomainSets.add(hostname);
+          whiteDomains.add(hostname);
           break;
         case ParseType.BlackIncludeSubdomain:
-          if (hostname[0] === '.') {
-            blacklistDomainSets.add(hostname);
-          } else {
-            blacklistDomainSets.add(`.${hostname}`);
-          }
+          blackDomainSuffixes.add(hostname);
           break;
         case ParseType.BlackAbsolute:
-          blacklistDomainSets.add(hostname);
+          blackDomains.add(hostname);
           break;
         case ParseType.ErrorMessage:
           warningMessages.push(hostname);
@@ -105,118 +100,19 @@ export function processFilterRulesWithPreload(
     console.log(
       picocolors.gray('[process filter]'),
       picocolors.gray(filterRulesUrl),
-      picocolors.gray(`white: ${whitelistDomainSets.size}`),
-      picocolors.gray(`black: ${blacklistDomainSets.size}`)
+      picocolors.gray(`white: ${whiteDomains.size + whiteDomainSuffixes.size}`),
+      picocolors.gray(`black: ${blackDomains.size + blackDomainSuffixes.size}`)
     );
 
     return {
-      white: Array.from(whitelistDomainSets),
-      black: Array.from(blacklistDomainSets)
+      whiteDomains: Array.from(whiteDomains),
+      whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
+      blackDomains: Array.from(blackDomains),
+      blackDomainSuffixes: Array.from(blackDomainSuffixes)
     };
   });
 }
 
-export async function processFilterRules(
-  parentSpan: Span,
-  filterRulesUrl: string,
-  fallbackUrls?: string[] | null,
-  includeThirdParty = false
-): Promise<{ white: string[], black: string[] }> {
-  const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn(async (span) => {
-    const text = await span.traceChildAsync('download', () => fetchAssets(filterRulesUrl, fallbackUrls));
-
-    const whitelistDomainSets = new Set<string>();
-    const blacklistDomainSets = new Set<string>();
-
-    const warningMessages: string[] = [];
-
-    const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed];
-    /**
-       * @param {string} line
-       */
-    const lineCb = (line: string) => {
-      const result = parse(line, MUTABLE_PARSE_LINE_RESULT, includeThirdParty);
-      const flag = result[1];
-
-      if (flag === ParseType.NotParsed) {
-        throw new Error(`Didn't parse line: ${line}`);
-      }
-      if (flag === ParseType.Null) {
-        return;
-      }
-
-      const hostname = result[0];
-
-      if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) {
-        onWhiteFound(hostname, filterRulesUrl);
-      } else {
-        onBlackFound(hostname, filterRulesUrl);
-      }
-
-      switch (flag) {
-        case ParseType.WhiteIncludeSubdomain:
-          if (hostname[0] === '.') {
-            whitelistDomainSets.add(hostname);
-          } else {
-            whitelistDomainSets.add(`.${hostname}`);
-          }
-          break;
-        case ParseType.WhiteAbsolute:
-          whitelistDomainSets.add(hostname);
-          break;
-        case ParseType.BlackIncludeSubdomain:
-          if (hostname[0] === '.') {
-            blacklistDomainSets.add(hostname);
-          } else {
-            blacklistDomainSets.add(`.${hostname}`);
-          }
-          break;
-        case ParseType.BlackAbsolute:
-          blacklistDomainSets.add(hostname);
-          break;
-        case ParseType.ErrorMessage:
-          warningMessages.push(hostname);
-          break;
-        default:
-          break;
-      }
-    };
-
-    const filterRules = text.split('\n');
-
-    span.traceChild('parse adguard filter').traceSyncFn(() => {
-      for (let i = 0, len = filterRules.length; i < len; i++) {
-        lineCb(filterRules[i]);
-      }
-    });
-
-    return [
-      Array.from(whitelistDomainSets),
-      Array.from(blacklistDomainSets),
-      warningMessages
-    ] as const;
-  });
-
-  for (let i = 0, len = warningMessages.length; i < len; i++) {
-    console.warn(
-      picocolors.yellow(warningMessages[i]),
-      picocolors.gray(picocolors.underline(filterRulesUrl))
-    );
-  }
-
-  console.log(
-    picocolors.gray('[process filter]'),
-    picocolors.gray(filterRulesUrl),
-    picocolors.gray(`white: ${white.length}`),
-    picocolors.gray(`black: ${black.length}`)
-  );
-
-  return {
-    white,
-    black
-  };
-}
-
 // const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
 // cname exceptional filter can not be parsed by NetworkFilter
 // Surge / Clash can't handle CNAME either, so we just ignore them

+ 1 - 1
package.json

@@ -31,7 +31,7 @@
     "fast-cidr-tools": "^0.3.1",
     "fast-fifo": "^1.3.2",
     "fdir": "^6.4.3",
-    "foxts": "^1.1.6",
+    "foxts": "^1.1.7",
     "hash-wasm": "^4.12.0",
     "json-stringify-pretty-compact": "^3.0.0",
     "picocolors": "^1.1.1",

+ 5 - 5
pnpm-lock.yaml

@@ -53,8 +53,8 @@ importers:
         specifier: ^6.4.3
         version: 6.4.3(picomatch@4.0.2)
       foxts:
-        specifier: ^1.1.6
-        version: 1.1.6
+        specifier: ^1.1.7
+        version: 1.1.7
       hash-wasm:
         specifier: ^4.12.0
         version: 4.12.0
@@ -1136,8 +1136,8 @@ packages:
     resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
     engines: {node: '>= 6'}
 
-  foxts@1.1.6:
-    resolution: {integrity: sha512-O2UR/MDLo0w4igcFHwLn2KyXUD84P6bE3U4OpVsxvcYrWLFvvDO8zKLBS/o++tFJTCq7p/3USR48E8/dF2vAAQ==}
+  foxts@1.1.7:
+    resolution: {integrity: sha512-Pw7S1yI20GY8gfj6RXt9usRE5TdQ/lgAqpy2EaWKUVNARC+jW0hxx/MQH8xkNlT3NSpt0X1P99CJTEvh3kVdUQ==}
 
   fs-constants@1.0.0:
     resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
@@ -2934,7 +2934,7 @@ snapshots:
       combined-stream: 1.0.8
       mime-types: 2.1.35
 
-  foxts@1.1.6: {}
+  foxts@1.1.7: {}
 
   fs-constants@1.0.0: {}