Browse Source

Perf: faster is-domain-loose check

SukkaW 3 years ago
parent
commit
4565b15e17

+ 12 - 3
Build/build-anti-bogus-domain.js

@@ -1,7 +1,7 @@
 const { fetchWithRetry } = require('./lib/fetch-retry');
 const fs = require('fs');
 const path = require('path');
-const { isIP } = require('net');
+const { isIPv4, isIPv6 } = require('net');
 
 (async () => {
   console.time('Total Time - build-anti-bogus-domain');
@@ -15,7 +15,8 @@ const { isIP } = require('net');
 
       return null
     })
-    .filter(ip => typeof ip === 'string' && isIP(ip) !== 0);
+    .filter(ip => typeof ip === 'string');
+
   console.timeEnd('* Download bogus-nxdomain-list')
 
   const filePath = path.resolve(__dirname, '../Source/ip/reject.conf');
@@ -23,7 +24,15 @@ const { isIP } = require('net');
   const content = (await fs.promises.readFile(filePath, 'utf-8'))
     .replace(
       '# --- [Anti Bogus Domain Replace Me] ---',
-      res.map(ip => `IP-CIDR,${ip}/32,no-resolve`).join('\n')
+      res.map(ip => {
+        if (isIPv4(ip)) {
+          return `IP-CIDR,${ip}/32,no-resolve`
+        }
+        if (isIPv6(ip)) {
+          return `IP-CIDR6,${ip}/128,no-resolve`
+        }
+        return ''
+      }).join('\n')
     );
 
   await fs.promises.writeFile(resultPath, content, 'utf-8');

+ 4 - 1
Build/build-reject-domainset.js

@@ -45,7 +45,10 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
     if (Array.isArray(input) && input.length === 2) {
       return processFilterRules(input[0], input[1]);
     }
-  }))).forEach(({ white, black }) => {
+  }))).forEach(({ white, black, foundDebugDomain }) => {
+    if (foundDebugDomain) {
+      process.exit(1);
+    };
     white.forEach(i => filterRuleWhitelistDomainSets.add(i));
     black.forEach(i => domainSets.add(i));
   });

+ 18 - 2
Build/lib/is-domain-loose.js

@@ -1,3 +1,19 @@
-const rDomain = /^(((?!\-))(xn\-\-)?[a-z0-9\-_]{0,61}[a-z0-9]{1,1}\.)*(xn\-\-)?([a-z0-9\-]{1,61}|[a-z0-9\-]{1,30})\.[a-z]{2,}$/m;
+const { parse } = require('tldts');
 
-module.exports.isDomainLoose = (domain) => rDomain.test(domain);
+module.exports.isDomainLoose = (domain) => {
+  const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true });
+  return !!(!isIp && (isIcann || isPrivate));
+};
+
+module.exports.normalizeDomain = (domain) => {
+  const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true });
+  if (isIp) {
+    return null;
+  }
+
+  if (isIcann || isPrivate) {
+    return hostname;
+  };
+
+  return null;
+}

+ 32 - 19
Build/lib/parse-filter.js

@@ -1,8 +1,8 @@
-const { isIP } = require('net');
 const { fetchWithRetry } = require('./fetch-retry');
-const { isDomainLoose } = require('./is-domain-loose');
+const { normalizeDomain } = require('./is-domain-loose');
 
 const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
+let foundDebugDomain = false;
 
 const warnOnceUrl = new Set();
 const warnOnce = (url, isWhite, ...message) => {
@@ -42,6 +42,7 @@ async function processDomainLists (domainListsUrl) {
 
     if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
       warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
+      foundDebugDomain = true;
     }
 
     domainSets.add(domainToAdd);
@@ -73,13 +74,15 @@ async function processHosts (hostsUrl, includeAllSubDomain = false) {
       return;
     }
     const [, ...domains] = line.split(' ');
-    const domain = domains.join(' ').trim();
+    const _domain = domains.join(' ').trim();
 
-    if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
+    if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
       warnOnce(hostsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
+      foundDebugDomain = true;
     }
 
-    if (isDomainLoose(domain)) {
+    const domain = normalizeDomain(_domain);
+    if (domain) {
       if (includeAllSubDomain) {
         domainSets.add(`.${domain}`);
       } else {
@@ -96,7 +99,7 @@ async function processHosts (hostsUrl, includeAllSubDomain = false) {
 /**
  * @param {string | URL} filterRulesUrl
  * @param {(string | URL)[] | undefined} fallbackUrls
- * @returns {Promise<{ white: Set<string>, black: Set<string> }>}
+ * @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
  */
 async function processFilterRules (filterRulesUrl, fallbackUrls) {
   console.time(`   - processFilterRules: ${filterRulesUrl}`);
@@ -135,8 +138,6 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
       || line.includes('[')
       || line.includes('(')
       || line.includes('$') && !lineStartsWithDoubleVerticalBar
-      || line === ''
-      || isIP(line) !== 0
     ) {
       continue;
     }
@@ -145,15 +146,17 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
     const lineEndsWithCaretVerticalBar = line.endsWith('^|');
 
     if (lineStartsWithDoubleVerticalBar && line.endsWith('^$badfilter')) {
-      const domain = line.replace('||', '').replace('^$badfilter', '').trim();
-      if (isDomainLoose(domain)) {
+      const _domain = line.replace('||', '').replace('^$badfilter', '').trim();
+      const domain = normalizeDomain(_domain);
+      if (domain) {
         if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
           warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
+          foundDebugDomain = true;
         }
 
         whitelistDomainSets.add(domain);
       } else {
-        console.warn('      * [parse-filter white] ' + domain + ' is not a valid domain');
+        console.warn('      * [parse-filter white] ' + _domain + ' is not a valid domain');
       }
     } else if (line.startsWith('@@||')
       && (
@@ -163,21 +166,25 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
         || line.endsWith('^$1p')
       )
     ) {
-      const domain = line
+      const _domain = line
         .replaceAll('@@||', '')
         .replaceAll('^$badfilter', '')
         .replaceAll('^$1p', '')
         .replaceAll('^|', '')
         .replaceAll('^', '')
         .trim();
-      if (isDomainLoose(domain)) {
+
+      const domain = normalizeDomain(_domain);
+
+      if (domain) {
         if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
           warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
+          foundDebugDomain = true;
         }
 
         whitelistDomainSets.add(domain);
       } else {
-        console.warn('      * [parse-filter white] ' + domain + ' is not a valid domain');
+        console.warn('      * [parse-filter white] ' + _domain + ' is not a valid domain');
       }
     } else if (
       lineStartsWithDoubleVerticalBar
@@ -187,16 +194,19 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
         || line.endsWith('^$all')
       )
     ) {
-      const domain = line
+      const _domain = line
         .replaceAll('||', '')
         .replaceAll('^|', '')
         .replaceAll('^$all', '')
         .replaceAll('^', '')
         .trim();
 
-      if (isDomainLoose(domain)) {
+      const domain = normalizeDomain(_domain);
+
+      if (domain) {
         if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
           warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
+          foundDebugDomain = true;
         }
 
         blacklistDomainSets.add(`.${domain}`);
@@ -208,10 +218,12 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
         || lineEndsWithCaretVerticalBar
       )
     ) {
-      const domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim();
-      if (isDomainLoose(domain)) {
+      const _domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim();
+      const domain = normalizeDomain(_domain);
+      if (domain) {
         if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
           warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
+          foundDebugDomain = true;
         }
 
         blacklistDomainSets.add(domain);
@@ -223,7 +235,8 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
 
   return {
     white: whitelistDomainSets,
-    black: blacklistDomainSets
+    black: blacklistDomainSets,
+    foundDebugDomain
   };
 }