Browse Source

Refactor build scripts

SukkaW 2 years ago
parent
commit
685427472b

+ 2 - 2
Build/build-cdn-conf.js

@@ -5,7 +5,7 @@ const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { withBannerArray } = require('./lib/with-banner');
 const { minifyRules } = require('./lib/minify-rules');
 const { domainDeduper } = require('./lib/domain-deduper');
-const { shouldIgnoreLine } = require('./lib/should-ignore-line');
+const { processLine } = require('./lib/process-line');
 const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
 
 const readline = require('readline');
@@ -68,7 +68,7 @@ const readline = require('readline');
     input: fs.createReadStream(path.resolve(__dirname, '../Source/domainset/cdn.conf'), 'utf-8'),
     crlfDelay: Infinity
   })) {
-    const l = shouldIgnoreLine(line);
+    const l = processLine(line);
     if (l) {
       cdnDomains.add(l);
     }

+ 12 - 13
Build/build-chn-cidr.js

@@ -1,25 +1,24 @@
-const { fetchWithRetry } = require('./lib/fetch-retry');
+const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
 const { withBannerArray } = require('./lib/with-banner');
 const { resolve: pathResolve } = require('path');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
+const { processLine } = require('./lib/process-line');
 
 (async () => {
   console.time('Total Time - build-chnroutes-cidr');
+  const { merge: mergeCidrs } = await import('cidr-tools');
 
-  const [rawCidr, { merge: mergeCidrs }] = await Promise.all([
-    (await fetchWithRetry('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')).text(),
-    import('cidr-tools')
-  ]);
-  const cidr = rawCidr.split('\n');
-
-  console.log('Before Merge:', cidr.length);
-  const filteredCidr = mergeCidrs(cidr.filter(line => {
-    if (line) {
-      return !line.startsWith('#');
+  /** @type {Set<string>} */
+  const cidr = new Set();
+  for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
+    const l = processLine(line);
+    if (l) {
+      cidr.add(l);
     }
+  }
 
-    return false;
-  }));
+  console.log('Before Merge:', cidr.size);
+  const filteredCidr = mergeCidrs(Array.from(cidr));
   console.log('After Merge:', filteredCidr.length);
 
   await compareAndWriteFile(

+ 3 - 2
Build/build-internal-rules.js

@@ -5,6 +5,7 @@ const path = require('path');
 const readline = require('readline');
 const { isDomainLoose } = require('./lib/is-domain-loose');
 const tldts = require('tldts');
+const { processLine } = require('./lib/process-line');
 
 (async () => {
   const set = new Set();
@@ -32,7 +33,7 @@ const tldts = require('tldts');
         addApexDomain(line.slice(1));
       } else if (isDomainLoose(line)) {
         addApexDomain(line);
-      } else if (!line.startsWith('#') && line.trim() !== '') {
+      } else if (processLine(line)) {
         console.warn('[drop line from domainset]', line);
       }
     }
@@ -52,7 +53,7 @@ const tldts = require('tldts');
         addApexDomain(line.replace('DOMAIN-SUFFIX,', ''));
       } else if (line.startsWith('DOMAIN,')) {
         addApexDomain(line.replace('DOMAIN,', ''));
-      } else if (!line.startsWith('#') && line.trim() !== '') {
+      } else if (processLine(line)) {
         console.warn('[drop line from ruleset]', line);
       }
     }

+ 3 - 3
Build/build-reject-domainset.js

@@ -11,7 +11,7 @@ const Trie = require('./lib/trie');
 const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
 const { withBannerArray } = require('./lib/with-banner');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
-const { shouldIgnoreLine } = require('./lib/should-ignore-line');
+const { processLine } = require('./lib/process-line');
 
 /** Whitelists */
 const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@@ -110,7 +110,7 @@ const domainSuffixSet = new Set();
   });
 
   for await (const line of rl1) {
-    const l = shouldIgnoreLine(line);
+    const l = processLine(line);
     if (l) {
       domainSets.add(l);
     }
@@ -138,7 +138,7 @@ const domainSuffixSet = new Set();
     crlfDelay: Infinity
   });
   for await (const line of rl3) {
-    const l = shouldIgnoreLine(line);
+    const l = processLine(line);
     if (l) {
       domainSets.add(l);
     }

+ 10 - 14
Build/lib/parse-filter.js

@@ -3,6 +3,7 @@ const { fetchWithRetry } = require('./fetch-retry');
 const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
 const { NetworkFilter } = require('@cliqz/adblocker');
 const { normalizeDomain } = require('./is-domain-loose');
+const { processLine } = require('./process-line');
 
 const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
 let foundDebugDomain = false;
@@ -31,18 +32,14 @@ async function processDomainLists(domainListsUrl) {
   const rl = await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl);
 
   for await (const line of rl) {
-    if (
-      line.startsWith('#')
-      || line.startsWith('!')
-      || line.startsWith(' ')
-      || line === ''
-      || line.startsWith('\r')
-      || line.startsWith('\n')
-    ) {
+    if (line.startsWith('!')) {
       continue;
     }
 
-    const domainToAdd = line.trim();
+    const domainToAdd = processLine(line);
+    if (!domainToAdd) {
+      continue;
+    }
 
     if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
       warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
@@ -69,13 +66,12 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
   const domainSets = new Set();
 
   const rl = await fetchRemoteTextAndCreateReadlineInterface(hostsUrl);
-  for await (const line of rl) {
-    if (line.includes('#')) {
-      continue;
-    }
-    if (line.startsWith(' ') || line.startsWith('\r') || line.startsWith('\n') || line.trim() === '') {
+  for await (const _line of rl) {
+    const line = processLine(_line);
+    if (!line) {
       continue;
     }
+
     const [, ...domains] = line.split(' ');
     const _domain = domains.join(' ').trim();
 

+ 5 - 2
Build/lib/should-ignore-line.js → Build/lib/process-line.js

@@ -1,10 +1,13 @@
 /* eslint-disable camelcase -- cache index access */
 
 /**
+ * If line is commented out or empty, return null.
+ * Otherwise, return trimmed line.
+ *
  * @param {string} line
  */
-module.exports.shouldIgnoreLine = (line) => {
-  if (line === '') {
+module.exports.processLine = (line) => {
+  if (!line) {
     return null;
   }