ソースを参照

Improve ruleset build process

SukkaW 1 年間 前
コミット
3ca9122a84

+ 4 - 3
Build/build-reject-domainset.ts

@@ -122,7 +122,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
         rejectOutput.addDomainKeyword(value); // Add for later deduplication
         rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
       } else if (type === 'DOMAIN-SUFFIX') {
-        rejectOutput.addDomainSuffix(value); // Add for later deduplication
+        rejectOutput.whitelistDomain('.' + value); // Add for later deduplication
+        rejectExtraOutput.whitelistDomain('.' + value); // Add for later deduplication
       }
     }
   });
@@ -139,8 +140,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
       rejectExtraOutput.whitelistDomain(domain);
     }
 
-    for (const domain of rejectOutput.sorted) {
-      rejectExtraOutput.whitelistDomain(domain);
+    for (let i = 0, len = rejectOutput.$preprocessed.length; i < len; i++) {
+      rejectOutput.whitelistDomain(rejectOutput.$preprocessed[i]);
     }
   });
 

+ 7 - 3
Build/lib/get-phishing-domains.ts

@@ -122,6 +122,7 @@ const sensitiveKeywords = createKeywordFilter([
   'fb-com',
   'facebook.',
   'facebook-',
+  'facebook-com',
   '.facebook',
   '-facebook',
   'coinbase',
@@ -139,7 +140,9 @@ const sensitiveKeywords = createKeywordFilter([
   'booking.com-',
   'booking-eu',
   'vinted-cz',
-  'inpost-pl'
+  'inpost-pl',
+  'login.microsoft',
+  'login-microsoft'
 ]);
 const lowKeywords = createKeywordFilter([
   '-co-jp',
@@ -147,7 +150,8 @@ const lowKeywords = createKeywordFilter([
   'customer-',
   '.www-',
   'instagram',
-  'microsoft'
+  'microsoft',
+  'passwordreset'
 ]);
 
 const cacheKey = createCacheKey(__filename);
@@ -224,7 +228,7 @@ async function processPhihsingDomains(domainArr: string[]) {
           domainScoreMap[apexDomain] >= 12
           || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4)
         ) {
-          domainArr.push(`.${apexDomain}`);
+          domainArr.push('.' + apexDomain);
         }
       }
 

+ 21 - 6
Build/lib/rules/base.ts

@@ -11,7 +11,7 @@ import { fastStringArrayJoin, writeFile } from '../misc';
 import { readFileByLine } from '../fetch-text-by-line';
 import { asyncWriteToStream } from '../async-write-to-stream';
 
-export abstract class RuleOutput {
+export abstract class RuleOutput<TPreprocessed = unknown> {
   protected domainTrie = createTrie<unknown>(null, true);
   protected domainKeywords = new Set<string>();
   protected domainWildcard = new Set<string>();
@@ -64,10 +64,14 @@ export abstract class RuleOutput {
     return result;
   };
 
+  protected span: Span;
+
   constructor(
-    protected readonly span: Span,
+    span: Span,
     protected readonly id: string
-  ) {}
+  ) {
+    this.span = span.traceChild('RuleOutput');
+  }
 
   protected title: string | null = null;
   withTitle(title: string) {
@@ -234,14 +238,21 @@ export abstract class RuleOutput {
     return this;
   }
 
-  abstract surge(): string[];
-  abstract clash(): string[];
-  abstract singbox(): string[];
+  protected abstract preprocess(): NonNullable<TPreprocessed>;
 
   done() {
     return this.pendingPromise;
   }
 
+  private $$preprocessed: TPreprocessed | null = null;
+
+  get $preprocessed() {
+    if (this.$$preprocessed === null) {
+      this.$$preprocessed = this.span.traceChildSync('RuleOutput#preprocess: ' + this.id, () => this.preprocess());
+    }
+    return this.$$preprocessed;
+  }
+
   async write(): Promise<void> {
     await this.done();
 
@@ -276,6 +287,10 @@ export abstract class RuleOutput {
       )
     ]);
   }
+
+  abstract surge(): string[];
+  abstract clash(): string[];
+  abstract singbox(): string[];
 }
 
 export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {

+ 22 - 25
Build/lib/rules/domainset.ts

@@ -5,56 +5,53 @@ import { RuleOutput } from './base';
 import type { SingboxSourceFormat } from '../singbox';
 import { nullthrow } from 'foxact/nullthrow';
 
-export class DomainsetOutput extends RuleOutput {
-  protected type = 'domainset' as const;
+type Preprocessed = string[];
 
-  private $sorted: string[] | null = null;
+export class DomainsetOutput extends RuleOutput<Preprocessed> {
+  protected type = 'domainset' as const;
 
-  get sorted() {
-    if (!this.$sorted) {
-      const kwfilter = createKeywordFilter(this.domainKeywords);
+  preprocess() {
+    const kwfilter = createKeywordFilter(this.domainKeywords);
 
-      const results: string[] = [];
+    const results: string[] = [];
 
-      const dumped = this.domainTrie.dump();
+    const dumped = this.domainTrie.dump();
 
-      for (let i = 0, len = dumped.length; i < len; i++) {
-        const domain = dumped[i];
-        if (!kwfilter(domain)) {
-          results.push(domain);
-        }
+    for (let i = 0, len = dumped.length; i < len; i++) {
+      const domain = dumped[i];
+      if (!kwfilter(domain)) {
+        results.push(domain);
       }
+    }
 
-      const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap);
-      sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
+    const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap);
+    sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
 
-      this.$sorted = sorted;
-    }
-    return this.$sorted;
+    return sorted;
   }
 
   calcDomainMap() {
     if (!this.apexDomainMap || !this.subDomainMap) {
-      const { domainMap, subdomainMap } = buildParseDomainMap(this.sorted);
+      const { domainMap, subdomainMap } = buildParseDomainMap(this.$preprocessed);
       this.apexDomainMap = domainMap;
       this.subDomainMap = subdomainMap;
     }
   }
 
   surge(): string[] {
-    return this.sorted;
+    return this.$preprocessed;
   }
 
   clash(): string[] {
-    return this.sorted.map(i => (i[0] === '.' ? `+${i}` : i));
+    return this.$preprocessed.map(i => (i[0] === '.' ? `+${i}` : i));
   }
 
   singbox(): string[] {
     const domains: string[] = [];
     const domainSuffixes: string[] = [];
 
-    for (let i = 0, len = this.sorted.length; i < len; i++) {
-      const domain = this.sorted[i];
+    for (let i = 0, len = this.$preprocessed.length; i < len; i++) {
+      const domain = this.$preprocessed[i];
       if (domain[0] === '.') {
         domainSuffixes.push(domain.slice(1));
       } else {
@@ -72,11 +69,11 @@ export class DomainsetOutput extends RuleOutput {
   }
 
   getStatMap() {
-    invariant(this.sorted, 'Non dumped yet');
+    invariant(this.$preprocessed, 'Non dumped yet');
     invariant(this.apexDomainMap, 'Missing apex domain map');
 
     return Array.from(
-      nullthrow(this.sorted, 'Non dumped yet')
+      nullthrow(this.$preprocessed, 'Non dumped yet')
         .reduce<Map<string, number>>((acc, cur) => {
           const suffix = this.apexDomainMap!.get(cur);
           if (suffix) {

+ 17 - 19
Build/lib/rules/ip.ts

@@ -6,30 +6,28 @@ import { RuleOutput } from './base';
 
 import { merge } from 'fast-cidr-tools';
 
-export class IPListOutput extends RuleOutput {
+type Preprocessed = string[];
+
+export class IPListOutput extends RuleOutput<Preprocessed> {
   protected type = 'ip' as const;
 
   constructor(span: Span, id: string, private readonly clashUseRule = true) {
     super(span, id);
   }
 
-  private $merged: string[] | null = null;
-  get merged() {
-    if (!this.$merged) {
-      const results: string[] = [];
-      appendArrayInPlace(
-        results,
-        merge(
-          appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)),
-          true
-        )
-      );
-      appendArrayFromSet(results, this.ipcidr6NoResolve);
-      appendArrayFromSet(results, this.ipcidr6);
+  protected preprocess() {
+    const results: string[] = [];
+    appendArrayInPlace(
+      results,
+      merge(
+        appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)),
+        true
+      )
+    );
+    appendArrayFromSet(results, this.ipcidr6NoResolve);
+    appendArrayFromSet(results, this.ipcidr6);
 
-      this.$merged = results;
-    }
-    return this.$merged;
+    return results;
   }
 
   private $surge: string[] | null = null;
@@ -59,7 +57,7 @@ export class IPListOutput extends RuleOutput {
       return this.surge();
     }
 
-    return this.merged;
+    return this.$preprocessed;
   }
 
   singbox(): string[] {
@@ -67,7 +65,7 @@ export class IPListOutput extends RuleOutput {
       version: 2,
       rules: [{
         domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'],
-        ip_cidr: this.merged
+        ip_cidr: this.$preprocessed
       }]
     };
     return RuleOutput.jsonToLines(singbox);

+ 26 - 28
Build/lib/rules/ruleset.ts

@@ -7,41 +7,39 @@ import type { SingboxSourceFormat } from '../singbox';
 import { sortDomains } from '../stable-sort-domain';
 import { RuleOutput } from './base';
 
-export class RulesetOutput extends RuleOutput {
+type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
+
+export class RulesetOutput extends RuleOutput<Preprocessed> {
   constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') {
     super(span, id);
   }
 
-  private $computed: [domain: string[], domainSuffix: string[], sortedDomainRules: string[]] | null = null;
-  private computed() {
-    if (!this.$computed) {
-      const kwfilter = createKeywordFilter(this.domainKeywords);
-
-      const domains: string[] = [];
-      const domainSuffixes: string[] = [];
-      const sortedDomainRules: string[] = [];
-
-      for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
-        if (kwfilter(domain)) {
-          continue;
-        }
-        if (domain[0] === '.') {
-          domainSuffixes.push(domain.slice(1));
-          sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`);
-        } else {
-          domains.push(domain);
-          sortedDomainRules.push(`DOMAIN,${domain}`);
-        }
-      }
+  protected preprocess() {
+    const kwfilter = createKeywordFilter(this.domainKeywords);
+
+    const domains: string[] = [];
+    const domainSuffixes: string[] = [];
+    const sortedDomainRules: string[] = [];
 
-      this.$computed = [domains, domainSuffixes, sortedDomainRules];
+    for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
+      if (kwfilter(domain)) {
+        continue;
+      }
+      if (domain[0] === '.') {
+        domainSuffixes.push(domain.slice(1));
+        sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`);
+      } else {
+        domains.push(domain);
+        sortedDomainRules.push(`DOMAIN,${domain}`);
+      }
     }
-    return this.$computed;
+
+    return [domains, domainSuffixes, sortedDomainRules] satisfies Preprocessed;
   }
 
   surge(): string[] {
     const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
-    appendArrayInPlace(results, this.computed()[2]);
+    appendArrayInPlace(results, this.$preprocessed[2]);
 
     appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
     appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
@@ -77,7 +75,7 @@ export class RulesetOutput extends RuleOutput {
   clash(): string[] {
     const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
 
-    appendArrayInPlace(results, this.computed()[2]);
+    appendArrayInPlace(results, this.$preprocessed[2]);
 
     appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
     appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`);
@@ -121,8 +119,8 @@ export class RulesetOutput extends RuleOutput {
     const singbox: SingboxSourceFormat = {
       version: 2,
       rules: [{
-        domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.computed()[0]),
-        domain_suffix: this.computed()[1],
+        domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.$preprocessed[0]),
+        domain_suffix: this.$preprocessed[1],
         domain_keyword: Array.from(this.domainKeywords),
         domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex),
         ip_cidr,