Browse Source

Refactor: adapt new output

SukkaW 1 year ago
parent
commit
4808ed8d27

+ 21 - 32
Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts

@@ -3,14 +3,15 @@ import path from 'node:path';
 import { DOMESTICS } from '../Source/non_ip/domestic';
 import { DOMESTICS } from '../Source/non_ip/domestic';
 import { DIRECTS, LANS } from '../Source/non_ip/direct';
 import { DIRECTS, LANS } from '../Source/non_ip/direct';
 import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
-import { compareAndWriteFile, createRuleset } from './lib/create-file';
+import { compareAndWriteFile } from './lib/create-file';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { createMemoizedPromise } from './lib/memo-promise';
 import * as yaml from 'yaml';
 import * as yaml from 'yaml';
 import { appendArrayInPlace } from './lib/append-array-in-place';
 import { appendArrayInPlace } from './lib/append-array-in-place';
-import { output, writeFile } from './lib/misc';
+import { writeFile } from './lib/misc';
 import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir';
 import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir';
+import { RulesetOutput } from './lib/create-file-new';
 
 
 export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => {
 export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => {
   const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf'));
   const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf'));
@@ -38,45 +39,33 @@ export const buildDomesticRuleset = task(require.main === module, __filename)(as
   appendArrayInPlace(dataset, Object.entries(LANS));
   appendArrayInPlace(dataset, Object.entries(LANS));
 
 
   return Promise.all([
   return Promise.all([
-    createRuleset(
-      span,
-      'Sukka\'s Ruleset - Domestic Domains',
-      [
+    new RulesetOutput(span, 'domestic', 'non_ip')
+      .withTitle('Sukka\'s Ruleset - Domestic Domains')
+      .withDescription([
         ...SHARED_DESCRIPTION,
         ...SHARED_DESCRIPTION,
         '',
         '',
         'This file contains known addresses that are avaliable in the Mainland China.'
         'This file contains known addresses that are avaliable in the Mainland China.'
-      ],
-      new Date(),
-      res[0],
-      'ruleset',
-      output('domestic', 'non_ip')
-    ),
-    createRuleset(
-      span,
-      'Sukka\'s Ruleset - Direct Rules',
-      [
+      ])
+      .addFromRuleset(res[0])
+      .write(),
+    new RulesetOutput(span, 'direct', 'non_ip')
+      .withTitle('Sukka\'s Ruleset - Direct Rules')
+      .withDescription([
         ...SHARED_DESCRIPTION,
         ...SHARED_DESCRIPTION,
         '',
         '',
         'This file contains domains and process that should not be proxied.'
         'This file contains domains and process that should not be proxied.'
-      ],
-      new Date(),
-      res[1],
-      'ruleset',
-      output('direct', 'non_ip')
-    ),
-    createRuleset(
-      span,
-      'Sukka\'s Ruleset - LAN',
-      [
+      ])
+      .addFromRuleset(res[1])
+      .write(),
+    new RulesetOutput(span, 'lan', 'non_ip')
+      .withTitle('Sukka\'s Ruleset - LAN')
+      .withDescription([
         ...SHARED_DESCRIPTION,
         ...SHARED_DESCRIPTION,
         '',
         '',
         'This file includes rules for LAN DOMAIN and reserved TLDs.'
         'This file includes rules for LAN DOMAIN and reserved TLDs.'
-      ],
-      new Date(),
-      res[2],
-      'ruleset',
-      output('lan', 'non_ip')
-    ),
+      ])
+      .addFromRuleset(res[2])
+      .write(),
     compareAndWriteFile(
     compareAndWriteFile(
       span,
       span,
       [
       [

+ 7 - 13
Build/build-microsoft-cdn.ts

@@ -1,12 +1,10 @@
 import { task } from './trace';
 import { task } from './trace';
-import { createRuleset } from './lib/create-file';
 import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
 import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
 import { createTrie } from './lib/trie';
 import { createTrie } from './lib/trie';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
 import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
-import { sortDomains } from './lib/stable-sort-domain';
-import { output } from './lib/misc';
+import { RulesetOutput } from './lib/create-file-new';
 
 
 const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
 const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
 
 
@@ -39,7 +37,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
   const trie2 = createTrie(foundMicrosoftCdnDomains, true);
   const trie2 = createTrie(foundMicrosoftCdnDomains, true);
   BLACKLIST.forEach(trie2.whitelist);
   BLACKLIST.forEach(trie2.whitelist);
 
 
-  return sortDomains(trie2.dump())
+  return trie2.dump()
     .map(d => `DOMAIN-SUFFIX,${d}`)
     .map(d => `DOMAIN-SUFFIX,${d}`)
     .concat(WHITELIST);
     .concat(WHITELIST);
 });
 });
@@ -56,13 +54,9 @@ export const buildMicrosoftCdn = task(require.main === module, __filename)(async
 
 
   const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
   const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
 
 
-  return createRuleset(
-    span,
-    'Sukka\'s Ruleset - Microsoft CDN',
-    description,
-    new Date(),
-    res,
-    'ruleset',
-    output('microsoft_cdn', 'non_ip')
-  );
+  return new RulesetOutput(span, 'microsoft_cdn', 'non_ip')
+    .withTitle('Sukka\'s Ruleset - Microsoft CDN')
+    .withDescription(description)
+    .addFromRuleset(res)
+    .write();
 });
 });

+ 51 - 133
Build/build-reject-domainset.ts

@@ -3,13 +3,10 @@ import path from 'node:path';
 import process from 'node:process';
 import process from 'node:process';
 
 
 import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
 import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
-import { createTrie } from './lib/trie';
 
 
 import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
 import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
-import { createRuleset, compareAndWriteFile } from './lib/create-file';
-import createKeywordFilter from './lib/aho-corasick';
+import { compareAndWriteFile } from './lib/create-file';
 import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
-import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
 import { task } from './trace';
 import { task } from './trace';
 // tldts-experimental is way faster than tldts, but very little bit inaccurate
 // tldts-experimental is way faster than tldts, but very little bit inaccurate
 // (since it is hashes based). But the result is still deterministic, which is
 // (since it is hashes based). But the result is still deterministic, which is
@@ -17,23 +14,47 @@ import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPhishingDomains } from './lib/get-phishing-domains';
 import { getPhishingDomains } from './lib/get-phishing-domains';
 
 
-import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
-import { output } from './lib/misc';
+import { setAddFromArray } from './lib/set-add-from-array';
 import { appendArrayInPlace } from './lib/append-array-in-place';
 import { appendArrayInPlace } from './lib/append-array-in-place';
 import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
 import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
+import { DomainsetOutput } from './lib/create-file-new';
 
 
 const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
 const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
 
 
 export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
 export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
+  const rejectOutput = new DomainsetOutput(span, 'reject')
+    .withTitle('Sukka\'s Ruleset - Reject Base')
+    .withDescription([
+      ...SHARED_DESCRIPTION,
+      '',
+      'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
+      '',
+      'Build from:',
+      ...HOSTS.map(host => ` - ${host[0]}`),
+      ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
+      ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
+    ]);
+
+  const rejectExtraOutput = new DomainsetOutput(span, 'reject_extra')
+    .withTitle('Sukka\'s Ruleset - Reject Extra')
+    .withDescription([
+      ...SHARED_DESCRIPTION,
+      '',
+      'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
+      '',
+      'Build from:',
+      ...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
+      ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
+      ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
+      ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
+    ]);
+
+  const appendArrayToRejectOutput = rejectOutput.addFromDomainset.bind(rejectOutput);
+  const appendArrayToRejectExtraOutput = rejectExtraOutput.addFromDomainset.bind(rejectExtraOutput);
+
   /** Whitelists */
   /** Whitelists */
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
 
 
-  const domainSets = new Set<string>();
-  const appendArrayToDomainSets = setAddFromArrayCurried(domainSets);
-
-  const domainSetsExtra = new Set<string>();
-  const appendArrayToDomainSetsExtra = setAddFromArrayCurried(domainSetsExtra);
-
   // Parse from AdGuard Filters
   // Parse from AdGuard Filters
   const shouldStop = await span
   const shouldStop = await span
     .traceChild('download and process hosts / adblock filter rules')
     .traceChild('download and process hosts / adblock filter rules')
@@ -42,11 +63,11 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
       let shouldStop = false;
       let shouldStop = false;
       await Promise.all([
       await Promise.all([
         // Parse from remote hosts & domain lists
         // Parse from remote hosts & domain lists
-        HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)),
-        HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSetsExtra)),
+        HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
+        HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
 
 
-        DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)),
-        DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSetsExtra)),
+        DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
+        DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
 
 
         ADGUARD_FILTERS.map(
         ADGUARD_FILTERS.map(
           entry => processFilterRules(childSpan, ...entry)
           entry => processFilterRules(childSpan, ...entry)
@@ -57,7 +78,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
                 // we should not break here, as we want to see full matches from all data source
                 // we should not break here, as we want to see full matches from all data source
               }
               }
               setAddFromArray(filterRuleWhitelistDomainSets, white);
               setAddFromArray(filterRuleWhitelistDomainSets, white);
-              setAddFromArray(domainSets, black);
+              appendArrayToRejectOutput(black);
             })
             })
         ),
         ),
         ADGUARD_FILTERS_EXTRA.map(
         ADGUARD_FILTERS_EXTRA.map(
@@ -69,7 +90,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
                 // we should not break here, as we want to see full matches from all data source
                 // we should not break here, as we want to see full matches from all data source
               }
               }
               setAddFromArray(filterRuleWhitelistDomainSets, white);
               setAddFromArray(filterRuleWhitelistDomainSets, white);
-              setAddFromArray(domainSetsExtra, black);
+              appendArrayToRejectExtraOutput(black);
             })
             })
         ),
         ),
 
 
@@ -82,8 +103,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
             setAddFromArray(filterRuleWhitelistDomainSets, black);
             setAddFromArray(filterRuleWhitelistDomainSets, black);
           })
           })
         )),
         )),
-        getPhishingDomains(childSpan).then(appendArrayToDomainSetsExtra),
-        getRejectSukkaConfPromise.then(appendArrayToDomainSets)
+        getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
+        getRejectSukkaConfPromise.then(appendArrayToRejectOutput)
       ].flat());
       ].flat());
       // eslint-disable-next-line sukka/no-single-return -- not single return
       // eslint-disable-next-line sukka/no-single-return -- not single return
       return shouldStop;
       return shouldStop;
@@ -93,72 +114,23 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
     process.exit(1);
     process.exit(1);
   }
   }
 
 
-  console.log(`Import ${domainSets.size} + ${domainSetsExtra.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
-
   // Dedupe domainSets
   // Dedupe domainSets
-  const domainKeywordsSet = await span.traceChildAsync('collect black keywords/suffixes', async () => {
+  await span.traceChildAsync('collect black keywords/suffixes', async () => {
     /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
     /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
-    const domainKeywordsSet = new Set<string>();
-
     for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
     for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
       const [type, value] = line.split(',');
       const [type, value] = line.split(',');
 
 
       if (type === 'DOMAIN-KEYWORD') {
       if (type === 'DOMAIN-KEYWORD') {
-        domainKeywordsSet.add(value);
+        rejectOutput.addDomainKeyword(value); // Add for later deduplication
+        rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
       } else if (type === 'DOMAIN-SUFFIX') {
       } else if (type === 'DOMAIN-SUFFIX') {
-        domainSets.add('.' + value); // Add to domainSets for later deduplication
+        rejectOutput.addDomainSuffix(value); // Add for later deduplication
       }
       }
     }
     }
-
-    return domainKeywordsSet;
   });
   });
 
 
-  const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
-    const baseTrie = createTrie(null, true);
-    const extraTrie = createTrie(null, true);
-
-    const kwfilter = createKeywordFilter(domainKeywordsSet);
-
-    childSpan.traceChildSync('add items to trie (extra)', () => {
-      for (const domain of domainSetsExtra) {
-        // exclude keyword when creating trie
-        if (!kwfilter(domain)) {
-          extraTrie.add(domain);
-        }
-      }
-    });
-
-    childSpan.traceChildSync('add items to trie (base) + dedupe extra trie', () => {
-      for (const domain of domainSets) {
-        // exclude keyword when creating trie
-        if (!kwfilter(domain)) {
-          baseTrie.add(domain);
-          extraTrie.whitelist(domain);
-        }
-      }
-    });
-
-    return [baseTrie, extraTrie] as const;
-  });
-
-  span.traceChildSync('dedupe from white suffixes (base)', () => filterRuleWhitelistDomainSets.forEach(baseTrie.whitelist));
-  span.traceChildSync('dedupe from white suffixes and base (extra)', () => {
-    filterRuleWhitelistDomainSets.forEach(extraTrie.whitelist);
-  });
-
-  // Dedupe domainSets
-  const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump());
-  const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump());
-
-  console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`);
-
-  const {
-    domainMap: domainArrayMainDomainMap,
-    subdomainMap: domainArraySubdomainMap
-  } = span.traceChildSync(
-    'build map for stat and sort',
-    () => buildParseDomainMap(dedupedDominArray.concat(dudupedDominArrayExtra))
-  );
+  rejectOutput.calcDomainMap();
+  rejectExtraOutput.calcDomainMap();
 
 
   // Create reject stats
   // Create reject stats
   const rejectDomainsStats: string[] = span
   const rejectDomainsStats: string[] = span
@@ -166,50 +138,15 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
     .traceSyncFn(() => {
     .traceSyncFn(() => {
       const results = [];
       const results = [];
       results.push('=== base ===');
       results.push('=== base ===');
-      appendArrayInPlace(results, getStatMap(dedupedDominArray, domainArrayMainDomainMap));
+      appendArrayInPlace(results, rejectOutput.getStatMap());
       results.push('=== extra ===');
       results.push('=== extra ===');
-      appendArrayInPlace(results, getStatMap(dudupedDominArrayExtra, domainArrayMainDomainMap));
+      appendArrayInPlace(results, rejectExtraOutput.getStatMap());
       return results;
       return results;
     });
     });
 
 
   return Promise.all([
   return Promise.all([
-    createRuleset(
-      span,
-      'Sukka\'s Ruleset - Reject Base',
-      [
-        ...SHARED_DESCRIPTION,
-        '',
-        'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
-        '',
-        'Build from:',
-        ...HOSTS.map(host => ` - ${host[0]}`),
-        ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
-        ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
-      ],
-      new Date(),
-      span.traceChildSync('sort reject domainset (base)', () => sortDomains(dedupedDominArray, domainArrayMainDomainMap, domainArraySubdomainMap)),
-      'domainset',
-      output('reject', 'domainset')
-    ),
-    createRuleset(
-      span,
-      'Sukka\'s Ruleset - Reject Extra',
-      [
-        ...SHARED_DESCRIPTION,
-        '',
-        'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
-        '',
-        'Build from:',
-        ...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
-        ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
-        ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
-        ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
-      ],
-      new Date(),
-      span.traceChildSync('sort reject domainset (extra)', () => sortDomains(dudupedDominArrayExtra, domainArrayMainDomainMap, domainArraySubdomainMap)),
-      'domainset',
-      output('reject_extra', 'domainset')
-    ),
+    rejectOutput.write(),
+    rejectExtraOutput.write(),
     compareAndWriteFile(
     compareAndWriteFile(
       span,
       span,
       rejectDomainsStats,
       rejectDomainsStats,
@@ -217,22 +154,3 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
     )
     )
   ]);
   ]);
 });
 });
-
-function getStatMap(domains: string[], domainArrayMainDomainMap: Map<string, string>): string[] {
-  return Array.from(
-    (
-      domains.reduce<Map<string, number>>((acc, cur) => {
-        const suffix = domainArrayMainDomainMap.get(cur);
-        if (suffix) {
-          acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
-        }
-        return acc;
-      }, new Map())
-    ).entries()
-  )
-    .filter(a => a[1] > 9)
-    .sort(
-      (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
-    )
-    .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
-};

+ 36 - 48
Build/build-reject-ip-list.ts

@@ -1,40 +1,40 @@
 // @ts-check
 // @ts-check
 import path from 'node:path';
 import path from 'node:path';
-import { createRuleset } from './lib/create-file';
 import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
 import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
-import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem';
+import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem';
 import { fetchAssets } from './lib/fetch-assets';
 import { fetchAssets } from './lib/fetch-assets';
 import { processLine } from './lib/process-line';
 import { processLine } from './lib/process-line';
-import { appendArrayInPlace } from './lib/append-array-in-place';
-import { output } from './lib/misc';
+import { RulesetOutput } from './lib/create-file-new';
+import { SOURCE_DIR } from './constants/dir';
 
 
 const cacheKey = createCacheKey(__filename);
 const cacheKey = createCacheKey(__filename);
 
 
 const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
 const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
 
 
-const getBogusNxDomainIPsPromise = fsFetchCache.apply(
+const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
   cacheKey(BOGUS_NXDOMAIN_URL),
   cacheKey(BOGUS_NXDOMAIN_URL),
   async () => {
   async () => {
-    const result: string[] = [];
+    const ipv4: string[] = [];
+    const ipv6: string[] = [];
     for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) {
     for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) {
       if (line.startsWith('bogus-nxdomain=')) {
       if (line.startsWith('bogus-nxdomain=')) {
         const ip = line.slice(15).trim();
         const ip = line.slice(15).trim();
         if (isProbablyIpv4(ip)) {
         if (isProbablyIpv4(ip)) {
-          result.push(`IP-CIDR,${ip}/32,no-resolve`);
+          ipv4.push(ip);
         } else if (isProbablyIpv6(ip)) {
         } else if (isProbablyIpv6(ip)) {
-          result.push(`IP-CIDR6,${ip}/128,no-resolve`);
+          ipv6.push(ip);
         }
         }
       }
       }
     }
     }
-    return result;
+    return [ipv4, ipv6] as const;
   },
   },
   {
   {
     ttl: TTL.ONE_WEEK(),
     ttl: TTL.ONE_WEEK(),
-    serializer: serializeArray,
-    deserializer: deserializeArray
+    serializer: JSON.stringify,
+    deserializer: JSON.parse
   }
   }
 );
 );
 
 
@@ -45,62 +45,50 @@ const BOTNET_FILTER_MIRROR_URL = [
   'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt'
   'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt'
 ];
 ];
 
 
-const getBotNetFilterIPsPromise = fsFetchCache.apply(
+const getBotNetFilterIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
   cacheKey(BOTNET_FILTER_URL),
   cacheKey(BOTNET_FILTER_URL),
   async () => {
   async () => {
     const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL);
     const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL);
-    return text.split('\n').reduce<string[]>((acc, cur) => {
+    return text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
       const ip = processLine(cur);
       const ip = processLine(cur);
       if (ip) {
       if (ip) {
         if (isProbablyIpv4(ip)) {
         if (isProbablyIpv4(ip)) {
-          acc.push(`IP-CIDR,${ip}/32,no-resolve`);
+          acc[0].push(ip);
         } else if (isProbablyIpv6(ip)) {
         } else if (isProbablyIpv6(ip)) {
-          acc.push(`IP-CIDR6,${ip}/128,no-resolve`);
+          acc[1].push(ip);
         }
         }
       }
       }
       return acc;
       return acc;
-    }, []);
+    }, [[], []]);
   },
   },
   {
   {
     ttl: TTL.TWLVE_HOURS(),
     ttl: TTL.TWLVE_HOURS(),
-    serializer: serializeArray,
-    deserializer: deserializeArray
+    serializer: JSON.stringify,
+    deserializer: JSON.parse
   }
   }
 );
 );
 
 
-const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(__dirname, '../Source/ip/reject.conf'));
-
 export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
 export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
-  const result = await localRejectIPSourcesPromise;
-
-  const results = await Promise.all([
+  const [bogusNxDomainIPs, botNetIPs] = await Promise.all([
     span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise),
     span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise),
     span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise)
     span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise)
   ]);
   ]);
 
 
-  const bogusNxDomainIPs = results[0];
-  const botNetIPs = results[1];
-
-  appendArrayInPlace(result, bogusNxDomainIPs);
-  appendArrayInPlace(result, botNetIPs);
-
-  const description = [
-    ...SHARED_DESCRIPTION,
-    '',
-    'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
-    '',
-    'Data from:',
-    ' - https://github.com/felixonmars/dnsmasq-china-list',
-    ' - https://github.com/curbengh/botnet-filter'
-  ];
-
-  return createRuleset(
-    span,
-    'Sukka\'s Ruleset - Anti Bogus Domain',
-    description,
-    new Date(),
-    result,
-    'ruleset',
-    output('reject', 'ip')
-  );
+  return new RulesetOutput(span, 'reject', 'ip')
+    .withTitle('Sukka\'s Ruleset - Anti Bogus Domain')
+    .withDescription([
+      ...SHARED_DESCRIPTION,
+      '',
+      'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
+      '',
+      'Data from:',
+      ' - https://github.com/felixonmars/dnsmasq-china-list',
+      ' - https://github.com/curbengh/botnet-filter'
+    ])
+    .addFromRuleset(await readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf')))
+    .bulkAddCIDR4NoResolve(bogusNxDomainIPs[0])
+    .bulkAddCIDR6NoResolve(bogusNxDomainIPs[1])
+    .bulkAddCIDR4NoResolve(botNetIPs[0])
+    .bulkAddCIDR6NoResolve(botNetIPs[1])
+    .write();
 });
 });

+ 14 - 30
Build/build-stream-service.ts

@@ -2,49 +2,33 @@
 import type { Span } from './trace';
 import type { Span } from './trace';
 import { task } from './trace';
 import { task } from './trace';
 
 
-import { createRuleset } from './lib/create-file';
-
 import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream';
 import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
-import { output } from './lib/misc';
+import { RulesetOutput } from './lib/create-file-new';
 
 
 export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array<import('../Source/stream').StreamService>) => {
 export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array<import('../Source/stream').StreamService>) => {
   return span.traceChildAsync(fileId, async (childSpan) => Promise.all([
   return span.traceChildAsync(fileId, async (childSpan) => Promise.all([
     // Domains
     // Domains
-    createRuleset(
-      childSpan,
-      `Sukka's Ruleset - Stream Services: ${title}`,
-      [
+    new RulesetOutput(childSpan, fileId, 'non_ip')
+      .withTitle(`Sukka's Ruleset - Stream Services: ${title}`)
+      .withDescription([
         ...SHARED_DESCRIPTION,
         ...SHARED_DESCRIPTION,
         '',
         '',
         ...streamServices.map((i) => `- ${i.name}`)
         ...streamServices.map((i) => `- ${i.name}`)
-      ],
-      new Date(),
-      streamServices.flatMap((i) => i.rules),
-      'ruleset',
-      output(fileId, 'non_ip')
-    ),
+      ])
+      .addFromRuleset(streamServices.flatMap((i) => i.rules))
+      .write(),
     // IP
     // IP
-    createRuleset(
-      childSpan,
-      `Sukka's Ruleset - Stream Services' IPs: ${title}`,
-      [
+    new RulesetOutput(childSpan, fileId, 'ip')
+      .withTitle(`Sukka's Ruleset - Stream Services IPs: ${title}`)
+      .withDescription([
         ...SHARED_DESCRIPTION,
         ...SHARED_DESCRIPTION,
         '',
         '',
         ...streamServices.map((i) => `- ${i.name}`)
         ...streamServices.map((i) => `- ${i.name}`)
-      ],
-      new Date(),
-      streamServices.flatMap((i) => (
-        i.ip
-          ? [
-            ...i.ip.v4.map((ip) => `IP-CIDR,${ip},no-resolve`),
-            ...i.ip.v6.map((ip) => `IP-CIDR6,${ip},no-resolve`)
-          ]
-          : []
-      )),
-      'ruleset',
-      output(fileId, 'ip')
-    )
+      ])
+      .bulkAddCIDR4NoResolve(streamServices.flatMap(i => i.ip?.v4 ?? []))
+      .bulkAddCIDR6NoResolve(streamServices.flatMap(i => i.ip?.v6 ?? []))
+      .write()
   ]));
   ]));
 };
 };
 
 

+ 7 - 11
Build/build-telegram-cidr.ts

@@ -3,11 +3,10 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
 import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
 import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
 import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
 import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
 import { processLine } from './lib/process-line';
 import { processLine } from './lib/process-line';
-import { createRuleset } from './lib/create-file';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { createMemoizedPromise } from './lib/memo-promise';
-import { output } from './lib/misc';
+import { RulesetOutput } from './lib/create-file-new';
 
 
 export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
 export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
   const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit);
   const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit);
@@ -45,13 +44,10 @@ export const buildTelegramCIDR = task(require.main === module, __filename)(async
     ' - https://core.telegram.org/resources/cidr.txt'
     ' - https://core.telegram.org/resources/cidr.txt'
   ];
   ];
 
 
-  return createRuleset(
-    span,
-    'Sukka\'s Ruleset - Telegram IP CIDR',
-    description,
-    date,
-    results,
-    'ruleset',
-    output('telegram', 'ip')
-  );
+  return new RulesetOutput(span, 'telegram', 'ip')
+    .withTitle('Sukka\'s Ruleset - Telegram IP CIDR')
+    .withDescription(description)
+    .withDate(date)
+    .addFromRuleset(results)
+    .write();
 });
 });

+ 68 - 2
Build/lib/create-file-new.ts

@@ -4,12 +4,14 @@ import type { Span } from '../trace';
 import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
 import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
 import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file';
 import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file';
 import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
 import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
-import { sortDomains } from './stable-sort-domain';
+import { buildParseDomainMap, sortDomains } from './stable-sort-domain';
 import { createTrie } from './trie';
 import { createTrie } from './trie';
 import { invariant } from 'foxact/invariant';
 import { invariant } from 'foxact/invariant';
 import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir';
 import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir';
 import stringify from 'json-stringify-pretty-compact';
 import stringify from 'json-stringify-pretty-compact';
 import { appendArrayInPlace } from './append-array-in-place';
 import { appendArrayInPlace } from './append-array-in-place';
+import { nullthrow } from 'foxact/nullthrow';
+import createKeywordFilter from './aho-corasick';
 
 
 abstract class RuleOutput {
 abstract class RuleOutput {
   protected domainTrie = createTrie<unknown>(null, true);
   protected domainTrie = createTrie<unknown>(null, true);
@@ -146,6 +148,13 @@ abstract class RuleOutput {
     return this;
     return this;
   }
   }
 
 
+  bulkAddCIDR4NoResolve(cidr: string[]) {
+    for (let i = 0, len = cidr.length; i < len; i++) {
+      this.ipcidrNoResolve.add(cidr[i]);
+    }
+    return this;
+  }
+
   bulkAddCIDR6(cidr: string[]) {
   bulkAddCIDR6(cidr: string[]) {
     for (let i = 0, len = cidr.length; i < len; i++) {
     for (let i = 0, len = cidr.length; i < len; i++) {
       this.ipcidr6.add(cidr[i]);
       this.ipcidr6.add(cidr[i]);
@@ -153,19 +162,54 @@ abstract class RuleOutput {
     return this;
     return this;
   }
   }
 
 
+  bulkAddCIDR6NoResolve(cidr: string[]) {
+    for (let i = 0, len = cidr.length; i < len; i++) {
+      this.ipcidr6NoResolve.add(cidr[i]);
+    }
+    return this;
+  }
+
   abstract write(): Promise<void>;
   abstract write(): Promise<void>;
 }
 }
 
 
 export class DomainsetOutput extends RuleOutput {
 export class DomainsetOutput extends RuleOutput {
   protected type = 'domainset' as const;
   protected type = 'domainset' as const;
 
 
+  private $dumped: string[] | null = null;
+
+  get dumped() {
+    if (!this.$dumped) {
+      const kwfilter = createKeywordFilter(this.domainKeywords);
+
+      const dumped = this.domainTrie.dump();
+      const set = new Set<string>(dumped);
+      for (let i = 0, len = dumped.length; i < len; i++) {
+        const domain = dumped[i];
+        if (kwfilter(domain)) {
+          set.delete(domain);
+        }
+      }
+
+      this.$dumped = Array.from(set);
+    }
+    return this.$dumped;
+  }
+
+  calcDomainMap() {
+    if (!this.apexDomainMap || !this.subDomainMap) {
+      const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped);
+      this.apexDomainMap = domainMap;
+      this.subDomainMap = subdomainMap;
+    }
+  }
+
   async write() {
   async write() {
     await this.pendingPromise;
     await this.pendingPromise;
 
 
     invariant(this.title, 'Missing title');
     invariant(this.title, 'Missing title');
     invariant(this.description, 'Missing description');
     invariant(this.description, 'Missing description');
 
 
-    const sorted = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap);
+    const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap);
     sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
     sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
 
 
     const surge = sorted;
     const surge = sorted;
@@ -201,6 +245,28 @@ export class DomainsetOutput extends RuleOutput {
       )
       )
     ]);
     ]);
   }
   }
+
+  getStatMap() {
+    invariant(this.dumped, 'Non dumped yet');
+    invariant(this.apexDomainMap, 'Missing apex domain map');
+
+    return Array.from(
+      (
+        nullthrow(this.dumped, 'Non dumped yet').reduce<Map<string, number>>((acc, cur) => {
+          const suffix = this.apexDomainMap!.get(cur);
+          if (suffix) {
+            acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
+          }
+          return acc;
+        }, new Map())
+      ).entries()
+    )
+      .filter(a => a[1] > 9)
+      .sort(
+        (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
+      )
+      .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
+  }
 }
 }
 
 
 export class IPListOutput extends RuleOutput {
 export class IPListOutput extends RuleOutput {