Browse Source

Chore: universal way to attach upstream data source

SukkaW 8 months ago
parent
commit
de1f817eec

+ 39 - 33
Build/build-reject-domainset.ts

@@ -39,45 +39,35 @@ const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processF
 const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
 
 export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
-  const rejectBaseDescription = [
-    ...SHARED_DESCRIPTION,
-    '',
-    'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining',
-    '',
-    'Build from:',
-    ...HOSTS.map(host => ` - ${host[0]}`),
-    ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
-    ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
-  ];
-
   const rejectDomainsetOutput = new DomainsetOutput(span, 'reject')
     .withTitle('Sukka\'s Ruleset - Reject Base')
-    .withDescription(rejectBaseDescription);
+    .withDescription([
+      ...SHARED_DESCRIPTION,
+      '',
+      'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
+    ])
+    .appendDataSource(HOSTS.map(host => host[0]))
+    .appendDataSource(DOMAIN_LISTS.map(domainList => domainList[0]));
 
   const rejectExtraDomainsetOutput = new DomainsetOutput(span, 'reject_extra')
     .withTitle('Sukka\'s Ruleset - Reject Extra')
     .withDescription([
       ...SHARED_DESCRIPTION,
       '',
-      'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining',
-      '',
-      'Build from:',
-      ...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
-      ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
-      ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${filter[0]}`)
-    ]);
+      'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
+    ])
+    .appendDataSource(HOSTS_EXTRA.map(host => host[0]))
+    .appendDataSource(DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
 
   const rejectPhisingDomainsetOutput = new DomainsetOutput(span, 'reject_phishing')
     .withTitle('Sukka\'s Ruleset - Reject Phishing')
     .withDescription([
       ...SHARED_DESCRIPTION,
       '',
-      'The domainset is specifically designed for anti-phishing',
-      '',
-      'Build from:',
-      ...PHISHING_HOSTS_EXTRA.map(host => ` - ${host[0]}`),
-      ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
-    ]);
+      'The domainset is specifically designed for anti-phishing'
+    ])
+    .appendDataSource(PHISHING_HOSTS_EXTRA.map(host => host[0]))
+    .appendDataSource(PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
 
   const rejectNonIpRulesetOutput = new RulesetOutput(span, 'reject', 'non_ip')
     .withTitle('Sukka\'s Ruleset - Reject Non-IP')
@@ -94,13 +84,10 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
     .withDescription([
       ...SHARED_DESCRIPTION,
       '',
-      'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
-      '',
-      'Data from:',
-      ' - https://github.com/felixonmars/dnsmasq-china-list',
-      ' - https://github.com/curbengh/botnet-filter',
-      ' - And other sources mentioned in /domainset/reject file'
+      'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.'
     ])
+    .appendDataSource('https://github.com/felixonmars/dnsmasq-china-list')
+    .appendDataSource('https://github.com/curbengh/botnet-filter')
     .bulkAddIPASN(AUGUST_ASN)
     .bulkAddIPASN(HUIZE_ASN);
 
@@ -143,7 +130,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
       rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan)),
 
       adguardFiltersDownloads.map(
-        task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => {
+        task => task(childSpan).then(({
+          filterRulesUrl,
+          whiteDomains, whiteDomainSuffixes,
+          blackDomains, blackDomainSuffixes,
+          blackIPs, blackWildcard,
+          whiteKeyword, blackKeyword
+        }) => {
           addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
           addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
 
@@ -154,13 +147,22 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
 
           rejectDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
 
+          rejectDomainsetOutput.appendDataSource(filterRulesUrl);
+
           rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
+          rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
 
           rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
+          rejectIPOutput.appendDataSource(filterRulesUrl);
         })
       ),
       adguardFiltersExtraDownloads.map(
-        task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => {
+        task => task(childSpan).then(({
+          filterRulesUrl,
+          whiteDomains, whiteDomainSuffixes,
+          blackDomains, blackDomainSuffixes,
+          blackIPs, blackWildcard, whiteKeyword, blackKeyword
+        }) => {
           addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
           addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
           addArrayElementsToSet(filterRuleWhiteKeywords, whiteKeyword);
@@ -170,9 +172,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
 
           rejectExtraDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
 
+          rejectExtraDomainsetOutput.appendDataSource(filterRulesUrl);
+
           rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
+          rejectIPOutput.appendDataSource(filterRulesUrl);
 
           rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
+          rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
         })
       ),
       adguardFiltersWhitelistsDownloads.map(

+ 1 - 1
Build/lib/get-phishing-domains.ts

@@ -7,7 +7,7 @@ import type { TldTsParsed } from './normalize-domain';
 const pool = new Worktank({
   name: 'process-phishing-domains',
   size: 1,
-  timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
+  timeout: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
   warmup: true,
   autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
   env: {},

+ 4 - 1
Build/lib/parse-filter/filters.ts

@@ -45,7 +45,9 @@ export function processFilterRulesWithPreload(
       | 'whiteKeyword'
       | 'blackKeyword',
       string[]
-    >
+    > & {
+      filterRulesUrl: string
+    }
   >(`process filter rules: ${filterRulesUrl}`, async (span) => {
     const filterRules = await span.traceChildPromise('download', downloadPromise);
 
@@ -140,6 +142,7 @@ export function processFilterRulesWithPreload(
     );
 
     return {
+      filterRulesUrl,
       whiteDomains: Array.from(whiteDomains),
       whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
       blackDomains: Array.from(blackDomains),

+ 30 - 3
Build/lib/rules/base.ts

@@ -2,6 +2,7 @@ import type { Span } from '../../trace';
 import { HostnameSmolTrie } from '../trie';
 import { not, nullthrow } from 'foxts/guard';
 import { fastIpVersion } from 'foxts/fast-ip-version';
+import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
 import type { MaybePromise } from '../misc';
 import type { BaseWriteStrategy } from '../writing-strategy/base';
 import { merge as mergeCidr } from 'fast-cidr-tools';
@@ -16,6 +17,8 @@ import { SurgeMitmSgmodule } from '../writing-strategy/surge';
 export class FileOutput {
   protected strategies: BaseWriteStrategy[] = [];
 
+  protected dataSource = new Set<string>();
+
   public domainTrie = new HostnameSmolTrie(null);
   public wildcardTrie: HostnameSmolTrie = new HostnameSmolTrie(null);
 
@@ -77,9 +80,9 @@ export class FileOutput {
     this.strategies.push(strategy);
   }
 
-  protected description: string[] | readonly string[] | null = null;
+  protected description: string[] | null = null;
   withDescription(description: string[] | readonly string[]) {
-    this.description = description;
+    this.description = description as string[];
     return this;
   }
 
@@ -314,6 +317,19 @@ export class FileOutput {
     return this;
   }
 
+  /**
+   * Add data source information. This will be rendered inside description
+   */
+  appendDataSource(source: string | string[]) {
+    if (typeof source === 'string') {
+      this.dataSource.add(source);
+    } else {
+      addArrayElementsToSet(this.dataSource, source);
+    }
+
+    return this;
+  }
+
   async done() {
     await this.pendingPromise;
     this.pendingPromise = null;
@@ -503,15 +519,26 @@ export class FileOutput {
       return childSpan.traceChildAsync('output to disk', (childSpan) => {
         const promises: Array<Promise<void> | void> = [];
 
+        const descriptions = nullthrow(this.description, 'Missing description');
+
+        if (this.dataSource.size) {
+          descriptions.push(
+            '',
+            'This file contains data from:'
+          );
+          appendArrayInPlace(descriptions, Array.from(this.dataSource).sort().map((source) => `  - ${source}`));
+        }
+
         for (let i = 0, len = this.strategies.length; i < len; i++) {
           const strategy = this.strategies[i];
 
           const basename = (strategy.overwriteFilename || this.id) + '.' + strategy.fileExtension;
+
           promises.push(
             childSpan.traceChildAsync('write ' + strategy.name, (childSpan) => Promise.resolve(strategy.output(
               childSpan,
               nullthrow(this.title, 'Missing title'),
-              nullthrow(this.description, 'Missing description'),
+              descriptions,
               this.date,
               path.join(
                 strategy.outputDir,