ソースを参照

Refactor: add more parsing

SukkaW 1 年間 前
コミット
1783cccf7f

+ 3 - 5
Build/lib/cache-filesystem.ts

@@ -4,13 +4,11 @@ import os from 'node:os';
 import path from 'node:path';
 import { mkdirSync } from 'node:fs';
 import picocolors from 'picocolors';
-import { fastStringArrayJoin } from './misc';
+import { fastStringArrayJoin, identity } from './misc';
 import { performance } from 'node:perf_hooks';
 import fs from 'node:fs';
 import { stringHash } from './string-hash';
 
-const identity = (x: any) => x;
-
 const enum CacheStatus {
   Hit = 'hit',
   Stale = 'stale',
@@ -186,7 +184,7 @@ export class Cache<S = string> {
     if (cached == null) {
       console.log(picocolors.yellow('[cache] miss'), picocolors.gray(key), picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`));
 
-      const serializer = 'serializer' in opt ? opt.serializer : identity;
+      const serializer = 'serializer' in opt ? opt.serializer : identity as any;
 
       const promise = fn();
 
@@ -202,7 +200,7 @@ export class Cache<S = string> {
       this.updateTtl(key, ttl);
     }
 
-    const deserializer = 'deserializer' in opt ? opt.deserializer : identity;
+    const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
     return deserializer(cached);
   }
 

+ 1 - 2
Build/lib/clash.ts

@@ -1,8 +1,7 @@
 import picocolors from 'picocolors';
-import { domainWildCardToRegex } from './misc';
+import { domainWildCardToRegex, identity } from './misc';
 import { isProbablyIpv4, isProbablyIpv6 } from './is-fast-ip';
 
-const identity = <T>(x: T): T => x;
 const unsupported = Symbol('unsupported');
 
 // https://dreamacro.github.io/clash/configuration/rules.html

+ 81 - 30
Build/lib/create-file.ts

@@ -13,7 +13,7 @@ import { nullthrow } from 'foxact/nullthrow';
 import createKeywordFilter from './aho-corasick';
 import picocolors from 'picocolors';
 import fs from 'node:fs';
-import { fastStringArrayJoin, writeFile } from './misc';
+import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc';
 import { readFileByLine } from './fetch-text-by-line';
 import { asyncWriteToStream } from './async-write-to-stream';
 
@@ -40,26 +40,49 @@ abstract class RuleOutput {
   protected domainTrie = createTrie<unknown>(null, true);
   protected domainKeywords = new Set<string>();
   protected domainWildcard = new Set<string>();
+  protected userAgent = new Set<string>();
+  protected processName = new Set<string>();
+  protected processPath = new Set<string>();
+  protected urlRegex = new Set<string>();
   protected ipcidr = new Set<string>();
   protected ipcidrNoResolve = new Set<string>();
+  protected ipasn = new Set<string>();
+  protected ipasnNoResolve = new Set<string>();
   protected ipcidr6 = new Set<string>();
   protected ipcidr6NoResolve = new Set<string>();
+  protected geoip = new Set<string>();
+  protected groipNoResolve = new Set<string>();
   // TODO: add sourceIpcidr
   // TODO: add sourcePort
   // TODO: add port
-  // TODO: processName
-  // TODO: processPath
-  // TODO: userAgent
-  // TODO: urlRegex
 
   protected otherRules: Array<[raw: string, orderWeight: number]> = [];
   protected abstract type: 'domainset' | 'non_ip' | 'ip';
 
   protected pendingPromise = Promise.resolve();
 
-  static jsonToLines(this: void, json: unknown): string[] {
-    return stringify(json).split('\n');
-  }
+  static jsonToLines = (json: unknown): string[] => stringify(json).split('\n');
+
+  static domainWildCardToRegex = (domain: string) => {
+    let result = '^';
+    for (let i = 0, len = domain.length; i < len; i++) {
+      switch (domain[i]) {
+        case '.':
+          result += String.raw`\.`;
+          break;
+        case '*':
+          result += '[a-zA-Z0-9-_.]*?';
+          break;
+        case '?':
+          result += '[a-zA-Z0-9-_.]';
+          break;
+        default:
+          result += domain[i];
+      }
+    }
+    result += '$';
+    return result;
+  };
 
   constructor(
     protected readonly span: Span,
@@ -114,11 +137,6 @@ abstract class RuleOutput {
     return this;
   }
 
-  addDomainWildcard(wildcard: string) {
-    this.domainWildcard.add(wildcard);
-    return this;
-  }
-
   private async addFromDomainsetPromise(source: AsyncIterable<string> | Iterable<string> | string[]) {
     for await (const line of source) {
       if (line[0] === '.') {
@@ -152,14 +170,35 @@ abstract class RuleOutput {
           this.addDomainKeyword(value);
           break;
         case 'DOMAIN-WILDCARD':
-          this.addDomainWildcard(value);
+          this.domainWildcard.add(value);
+          break;
+        case 'USER-AGENT':
+          this.userAgent.add(value);
+          break;
+        case 'PROCESS-NAME':
+          if (value.includes('/') || value.includes('\\')) {
+            this.processPath.add(value);
+          } else {
+            this.processName.add(value);
+          }
           break;
+        case 'URL-REGEX': {
+          const [, ...rest] = splitted;
+          this.urlRegex.add(rest.join(','));
+          break;
+        }
         case 'IP-CIDR':
           (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value);
           break;
         case 'IP-CIDR6':
           (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value);
           break;
+        case 'IP-ASN':
+          (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value);
+          break;
+        case 'GEOIP':
+          (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value);
+          break;
         default:
           this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]);
           break;
@@ -374,9 +413,7 @@ export class RulesetOutput extends RuleOutput {
 
     const kwfilter = createKeywordFilter(this.domainKeywords);
 
-    const sortedDomains = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap);
-    for (let i = 0, len = sortedDomains.length; i < len; i++) {
-      const domain = sortedDomains[i];
+    for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
       if (kwfilter(domain)) {
         continue;
       }
@@ -387,22 +424,36 @@ export class RulesetOutput extends RuleOutput {
       }
     }
 
-    for (const keyword of this.domainKeywords) {
-      results.push(`DOMAIN-KEYWORD,${keyword}`);
-    }
-    for (const wildcard of this.domainWildcard) {
-      results.push(`DOMAIN-WILDCARD,${wildcard}`);
-    }
+    appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
+    appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
+
+    appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`);
 
-    const sortedRules = this.otherRules.sort((a, b) => a[1] - b[1]);
-    for (let i = 0, len = sortedRules.length; i < len; i++) {
-      results.push(sortedRules[i][0]);
+    appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`);
+    appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`);
+
+    for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) {
+      results.push(rule);
     }
 
-    this.ipcidr.forEach(cidr => results.push(`IP-CIDR,${cidr}`));
-    this.ipcidrNoResolve.forEach(cidr => results.push(`IP-CIDR,${cidr},no-resolve`));
-    this.ipcidr6.forEach(cidr => results.push(`IP-CIDR6,${cidr}`));
-    this.ipcidr6NoResolve.forEach(cidr => results.push(`IP-CIDR6,${cidr},no-resolve`));
+    appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`);
+
+    appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`);
+    appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`);
+    appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`);
+    appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`);
+
+    appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`);
+    appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`);
+    appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`);
+    appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`);
+
+    for (const geoip of this.geoip) {
+      results.push(`GEOIP,${geoip}`);
+    }
+    for (const geoip of this.groipNoResolve) {
+      results.push(`GEOIP,${geoip},no-resolve`);
+    }
 
     const surge = results;
     const clash = surgeRulesetToClashClassicalTextRuleset(results);

+ 11 - 0
Build/lib/misc.ts

@@ -61,6 +61,17 @@ export const domainWildCardToRegex = (domain: string) => {
   return result;
 };
 
+export const identity = <T>(x: T): T => x;
+
+export const appendArrayFromSet = <T>(dest: T[], source: Set<T>, transformer: (item: T) => T = identity) => {
+  const iterator = source.values();
+  let step: IteratorResult<T, undefined>;
+
+  while ((step = iterator.next(), !step.done)) {
+    dest.push(transformer(step.value));
+  }
+};
+
 export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => {
   return [
     path.join(OUTPUT_SURGE_DIR, type, id + '.conf'),

+ 1 - 2
Build/lib/parse-filter.ts

@@ -11,13 +11,12 @@ import { deserializeArray, fsFetchCache, serializeArray, createCacheKey } from '
 import type { Span } from '../trace';
 import createKeywordFilter from './aho-corasick';
 import { looseTldtsOpt } from '../constants/loose-tldts-opt';
+import { identity } from './misc';
 
 const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
 let foundDebugDomain = false;
 const temporaryBypass = typeof DEBUG_DOMAIN_TO_FIND === 'string';
 
-const identity = <T>(x: T) => x;
-
 const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => {
   let line = processLine(l);
   if (!line) return;