Browse Source

Add build step to CDN domainset

SukkaW 2 years ago
parent
commit
b43c1628d6

+ 1 - 0
.gitignore

@@ -5,6 +5,7 @@ node_modules
 public
 
 List/domainset/reject.conf
+List/domainset/cdn.conf
 List/domainset/reject_phishing.conf
 List/domainset/reject_sukka.conf
 List/domainset/apple_cdn.conf

+ 54 - 14
Build/build-cdn-conf.js

@@ -1,18 +1,28 @@
-const { fetchWithRetry } = require('./lib/fetch-retry');
+// @ts-check
 const fs = require('fs');
 const path = require('path');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { withBannerArray } = require('./lib/with-banner');
 const { minifyRules } = require('./lib/minify-rules');
+const { domainDeduper } = require('./lib/domain-deduper');
+const { shouldIgnoreLine } = require('./lib/should-ignore-line');
+const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
+
+const readline = require('readline');
 
 (async () => {
   console.time('Total Time - build-cdn-conf');
 
-  const domains = (await (await fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat')).text()).split('\n');
+  /**
+   * Extract OSS domain from publicsuffix list
+   * @type {Set<string>}
+   */
+  const S3OSSDomains = new Set();
 
-  const S3OSSDomains = domains.filter(line => {
-    if (line) {
-      return (
+  for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
+    if (
+      line
+      && (
         line.startsWith('s3-')
         || line.startsWith('s3.')
       )
@@ -20,18 +30,16 @@ const { minifyRules } = require('./lib/minify-rules');
         line.endsWith('.amazonaws.com')
         || line.endsWith('.scw.cloud')
       )
-      && !line.includes('cn-');
+      && !line.includes('cn-')
+    ) {
+      S3OSSDomains.add(line);
     }
+  }
 
-    return false;
-  });
-
-  const filePath = path.resolve(__dirname, '../Source/non_ip/cdn.conf');
-  const resultPath = path.resolve(__dirname, '../List/non_ip/cdn.conf');
-  const content = (await fs.promises.readFile(filePath, 'utf-8'))
+  const content = (await fs.promises.readFile(path.resolve(__dirname, '../Source/non_ip/cdn.conf'), 'utf-8'))
     .replace(
       '# --- [AWS S3 Replace Me] ---',
-      S3OSSDomains.map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n')
+      Array.from(S3OSSDomains).map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n')
     );
 
   await compareAndWriteFile(
@@ -47,7 +55,39 @@ const { minifyRules } = require('./lib/minify-rules');
       new Date(),
       minifyRules(content.split('\n'))
     ),
-    resultPath
+    path.resolve(__dirname, '../List/non_ip/cdn.conf')
+  );
+
+  /**
+   * Dedupe cdn.conf
+   */
+  /** @type {Set<string>} */
+  const cdnDomains = new Set();
+
+  for await (const line of readline.createInterface({
+    input: fs.createReadStream(path.resolve(__dirname, '../Source/domainset/cdn.conf'), 'utf-8'),
+    crlfDelay: Infinity
+  })) {
+    const l = shouldIgnoreLine(line);
+    if (l) {
+      cdnDomains.add(l);
+    }
+  }
+
+  await compareAndWriteFile(
+    withBannerArray(
+      'Sukka\'s Surge Rules - CDN Domains',
+      [
+        'License: AGPL 3.0',
+        'Homepage: https://ruleset.skk.moe',
+        'GitHub: https://github.com/SukkaW/Surge',
+        '',
+        'This file contains object storage and static assets CDN domains.'
+      ],
+      new Date(),
+      minifyRules(domainDeduper(Array.from(cdnDomains)))
+    ),
+    path.resolve(__dirname, '../List/domainset/cdn.conf')
   );
 
   console.timeEnd('Total Time - build-cdn-conf');

+ 7 - 24
Build/build-reject-domainset.js

@@ -11,6 +11,7 @@ const Trie = require('./lib/trie');
 const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
 const { withBannerArray } = require('./lib/with-banner');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
+const { shouldIgnoreLine } = require('./lib/should-ignore-line');
 
 /** Whitelists */
 const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@@ -109,19 +110,10 @@ const domainSuffixSet = new Set();
   });
 
   for await (const line of rl1) {
-    if (
-      line.startsWith('#')
-      || line.startsWith(' ')
-      || line.startsWith('\r')
-      || line.startsWith('\n')
-    ) {
-      continue;
+    const l = shouldIgnoreLine(line);
+    if (l) {
+      domainSets.add(l);
     }
-
-    const trimmed = line.trim();
-    if (trimmed === '') continue;
-
-    domainSets.add(trimmed);
   }
 
   previousSize = domainSets.size - previousSize;
@@ -146,19 +138,10 @@ const domainSuffixSet = new Set();
     crlfDelay: Infinity
   });
   for await (const line of rl3) {
-    if (
-      line.startsWith('#')
-      || line.startsWith(' ')
-      || line.startsWith('\r')
-      || line.startsWith('\n')
-    ) {
-      continue;
+    const l = shouldIgnoreLine(line);
+    if (l) {
+      domainSets.add(l);
     }
-
-    const trimmed = line.trim();
-    if (trimmed === '') continue;
-
-    domainSuffixSet.add(trimmed);
   }
 
   console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);

+ 27 - 0
Build/lib/domain-deduper.js

@@ -0,0 +1,27 @@
+const Trie = require('./trie');
+
+/**
+ * @param {string[]} inputDomains
+ */
+const domainDeduper = (inputDomains) => {
+  const trie = Trie.from(inputDomains);
+  const sets = new Set(inputDomains);
+
+  for (let j = 0, len = inputDomains.length; j < len; j++) {
+    const d = inputDomains[j];
+    if (d[0] !== '.') {
+      continue;
+    }
+
+    trie.find(d, false).forEach(f => sets.delete(f));
+
+    const a = d.slice(1);
+    if (trie.has(a)) {
+      sets.delete(a);
+    }
+  }
+
+  return Array.from(sets);
+};
+
+module.exports.domainDeduper = domainDeduper;

+ 29 - 0
Build/lib/should-ignore-line.js

@@ -0,0 +1,29 @@
+/* eslint-disable camelcase -- cache index access */
+
+/**
+ * @param {string} line
+ */
+module.exports.shouldIgnoreLine = (line) => {
+  if (line === '') {
+    return null;
+  }
+
+  const line_0 = line[0];
+
+  if (
+    line_0 === '#'
+    || line_0 === ' '
+    || line_0 === '\r'
+    || line_0 === '\n'
+    || line_0 === '!'
+  ) {
+    return null;
+  }
+
+  const trimmed = line.trim();
+  if (trimmed === '') {
+    return null;
+  }
+
+  return trimmed;
+};

+ 10 - 10
Build/lib/trie.js

@@ -81,6 +81,7 @@ class Trie {
       $suffix = suffixStack.pop();
       node = nodeStack.pop();
 
+      // eslint-disable-next-line guard-for-in -- plain object
       for (k in node) {
         if (k === SENTINEL) {
           if (includeEqualWithSuffix) {
@@ -89,7 +90,6 @@ class Trie {
             matches.push($suffix);
           }
 
-
           continue;
         }
 
@@ -161,8 +161,9 @@ class Trie {
       node = node[token];
 
       // Prefix does not exist
-      if (typeof node === 'undefined')
+      if (typeof node === 'undefined') {
         return false;
+      }
 
       // Keeping track of a potential branch to prune
       if (toPrune !== null) {
@@ -170,12 +171,9 @@ class Trie {
           toPrune = null;
           tokenToPrune = null;
         }
-      }
-      else {
-        if (Object.keys(node).length < 2) {
-          toPrune = parent;
-          tokenToPrune = token;
-        }
+      } else if (Object.keys(node).length < 2) {
+        toPrune = parent;
+        tokenToPrune = token;
       }
     }
 
@@ -206,8 +204,9 @@ class Trie {
       token = suffix[i];
       node = node[token];
 
-      if (typeof node === 'undefined')
+      if (typeof node === 'undefined') {
         return false;
+      }
     }
 
     return SENTINEL in node;
@@ -217,7 +216,7 @@ class Trie {
    * @return {string[]}
    */
   dump() {
-    let node = this.root;
+    const node = this.root;
     const nodeStack = [];
     const prefixStack = [];
     // Resolving initial prefix
@@ -238,6 +237,7 @@ class Trie {
       currentNode = nodeStack.pop();
       currentPrefix = prefixStack.pop();
 
+      // eslint-disable-next-line guard-for-in -- plain object
       for (k in currentNode) {
         if (k === SENTINEL) {
           hasValue = true;

+ 0 - 0
List/domainset/cdn.conf → Source/domainset/cdn.conf