Browse Source

Perf: preload all reject data source

SukkaW 1 year ago
parent
commit
5e0780af35
3 changed files with 117 additions and 32 deletions
  1. 20 17
      Build/build-reject-domainset.ts
  2. 1 15
      Build/lib/parse-filter.test.ts
  3. 96 0
      Build/lib/parse-filter/filters.ts

+ 20 - 17
Build/build-reject-domainset.ts

@@ -4,7 +4,7 @@ import process from 'node:process';
 
 
 import { processHostsWithPreload } from './lib/parse-filter/hosts';
 import { processHostsWithPreload } from './lib/parse-filter/hosts';
 import { processDomainListsWithPreload } from './lib/parse-filter/domainlists';
 import { processDomainListsWithPreload } from './lib/parse-filter/domainlists';
-import { processFilterRules } from './lib/parse-filter/filters';
+import { processFilterRulesWithPreload } from './lib/parse-filter/filters';
 
 
 import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
 import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
 import { compareAndWriteFile } from './lib/create-file';
 import { compareAndWriteFile } from './lib/create-file';
@@ -33,6 +33,9 @@ const hostsDownloads = HOSTS.map(entry => processHostsWithPreload(...entry));
 const hostsExtraDownloads = HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry));
 const hostsExtraDownloads = HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry));
 const domainListsDownloads = DOMAIN_LISTS.map(entry => processDomainListsWithPreload(...entry));
 const domainListsDownloads = DOMAIN_LISTS.map(entry => processDomainListsWithPreload(...entry));
 const domainListsExtraDownloads = DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry));
 const domainListsExtraDownloads = DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry));
+const adguardFiltersDownloads = ADGUARD_FILTERS.map(entry => processFilterRulesWithPreload(...entry));
+const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processFilterRulesWithPreload(...entry));
+const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
 
 
 export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
 export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
   const rejectBaseDescription = [
   const rejectBaseDescription = [
@@ -81,24 +84,24 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
       domainListsDownloads.map(task => task(childSpan).then(appendArrayToRejectOutput)),
       domainListsDownloads.map(task => task(childSpan).then(appendArrayToRejectOutput)),
       domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
       domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
 
 
-      ADGUARD_FILTERS.map(
-        entry => processFilterRules(childSpan, ...entry)
-          .then(({ white, black }) => {
-            addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-            appendArrayToRejectOutput(black);
-          })
+      adguardFiltersDownloads.map(
+        task => task(childSpan).then(({ white, black }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
+          appendArrayToRejectOutput(black);
+        })
       ),
       ),
-      ADGUARD_FILTERS_EXTRA.map(
-        entry => processFilterRules(childSpan, ...entry)
-          .then(({ white, black }) => {
-            addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-            appendArrayToRejectExtraOutput(black);
-          })
+      adguardFiltersExtraDownloads.map(
+        task => task(childSpan).then(({ white, black }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
+          appendArrayToRejectExtraOutput(black);
+        })
+      ),
+      adguardFiltersWhitelistsDownloads.map(
+        task => task(childSpan).then(({ white, black }) => {
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
+          addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
+        })
       ),
       ),
-      ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
-        addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
-        addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
-      })),
       getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
       getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
       readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
       readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
       readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),
       readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),

+ 1 - 15
Build/lib/parse-filter.test.ts

@@ -1,11 +1,7 @@
 import { describe, it } from 'mocha';
 import { describe, it } from 'mocha';
 
 
-import { parse, processFilterRules } from './parse-filter/filters';
+import { parse } from './parse-filter/filters';
 import type { ParseType } from './parse-filter/filters';
 import type { ParseType } from './parse-filter/filters';
-import { createCacheKey } from './cache-filesystem';
-import { createSpan } from '../trace';
-
-const cacheKey = createCacheKey(__filename);
 
 
 describe('parse', () => {
 describe('parse', () => {
   const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
   const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
@@ -14,13 +10,3 @@ describe('parse', () => {
     console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT, false));
     console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT, false));
   });
   });
 });
 });
-
-describe.skip('processFilterRules', () => {
-  it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => {
-    console.log(processFilterRules(
-      createSpan('noop'),
-      cacheKey('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt'),
-      []
-    ));
-  });
-});

+ 96 - 0
Build/lib/parse-filter/filters.ts

@@ -20,6 +20,102 @@ const enum ParseType {
 
 
 export { type ParseType };
 export { type ParseType };
 
 
+export function processFilterRulesWithPreload(
+  filterRulesUrl: string,
+  fallbackUrls?: string[] | null,
+  allowThirdParty = false
+) {
+  const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
+
+  return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => {
+    const text = await span.traceChildPromise('download', downloadPromise);
+
+    const whitelistDomainSets = new Set<string>();
+    const blacklistDomainSets = new Set<string>();
+
+    const warningMessages: string[] = [];
+
+    const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed];
+    /**
+       * @param {string} line
+       */
+    const lineCb = (line: string) => {
+      const result = parse(line, MUTABLE_PARSE_LINE_RESULT, allowThirdParty);
+      const flag = result[1];
+
+      if (flag === ParseType.NotParsed) {
+        throw new Error(`Didn't parse line: ${line}`);
+      }
+      if (flag === ParseType.Null) {
+        return;
+      }
+
+      const hostname = result[0];
+
+      if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) {
+        onWhiteFound(hostname, filterRulesUrl);
+      } else {
+        onBlackFound(hostname, filterRulesUrl);
+      }
+
+      switch (flag) {
+        case ParseType.WhiteIncludeSubdomain:
+          if (hostname[0] === '.') {
+            whitelistDomainSets.add(hostname);
+          } else {
+            whitelistDomainSets.add(`.${hostname}`);
+          }
+          break;
+        case ParseType.WhiteAbsolute:
+          whitelistDomainSets.add(hostname);
+          break;
+        case ParseType.BlackIncludeSubdomain:
+          if (hostname[0] === '.') {
+            blacklistDomainSets.add(hostname);
+          } else {
+            blacklistDomainSets.add(`.${hostname}`);
+          }
+          break;
+        case ParseType.BlackAbsolute:
+          blacklistDomainSets.add(hostname);
+          break;
+        case ParseType.ErrorMessage:
+          warningMessages.push(hostname);
+          break;
+        default:
+          break;
+      }
+    };
+
+    const filterRules = text.split('\n');
+
+    span.traceChild('parse adguard filter').traceSyncFn(() => {
+      for (let i = 0, len = filterRules.length; i < len; i++) {
+        lineCb(filterRules[i]);
+      }
+    });
+
+    for (let i = 0, len = warningMessages.length; i < len; i++) {
+      console.warn(
+        picocolors.yellow(warningMessages[i]),
+        picocolors.gray(picocolors.underline(filterRulesUrl))
+      );
+    }
+
+    console.log(
+      picocolors.gray('[process filter]'),
+      picocolors.gray(filterRulesUrl),
+      picocolors.gray(`white: ${whitelistDomainSets.size}`),
+      picocolors.gray(`black: ${blacklistDomainSets.size}`)
+    );
+
+    return {
+      white: Array.from(whitelistDomainSets),
+      black: Array.from(blacklistDomainSets)
+    };
+  });
+}
+
 export async function processFilterRules(
 export async function processFilterRules(
   parentSpan: Span,
   parentSpan: Span,
   filterRulesUrl: string,
   filterRulesUrl: string,