瀏覽代碼

Perf: run filter TransformStream before parsing

SukkaW 9 月之前
父節點
當前提交
58aed6aa60
共有 2 個文件被更改,包括 101 次插入37 次删除
  1. 8 1
      Build/lib/fetch-assets.ts
  2. 93 36
      Build/lib/parse-filter/filters.ts

+ 8 - 1
Build/lib/fetch-assets.ts

@@ -4,6 +4,7 @@ import { waitWithAbort } from 'foxts/wait';
 import { nullthrow } from 'foxts/guard';
 import { TextLineStream } from 'foxts/text-line-stream';
 import { ProcessLineStream } from './process-line';
+import { AdGuardFilterIgnoreUnsupportedLinesStream } from './parse-filter/filters';
 
 // eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
 class CustomAbortError extends Error {
@@ -13,7 +14,10 @@ class CustomAbortError extends Error {
 
 const reusedCustomAbortError = new CustomAbortError();
 
-export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false, allowEmpty = false) {
+export async function fetchAssets(
+  url: string, fallbackUrls: null | undefined | string[] | readonly string[],
+  processLine = false, allowEmpty = false, filterAdGuardUnsupportedLines = false
+) {
   const controller = new AbortController();
 
   const createFetchFallbackPromise = async (url: string, index: number) => {
@@ -36,6 +40,9 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined |
     if (processLine) {
       stream = stream.pipeThrough(new ProcessLineStream());
     }
+    if (filterAdGuardUnsupportedLines) {
+      stream = stream.pipeThrough(new AdGuardFilterIgnoreUnsupportedLinesStream());
+    }
     const arr = await Array.fromAsync(stream);
 
     if (arr.length < 1 && !allowEmpty) {

+ 93 - 36
Build/lib/parse-filter/filters.ts

@@ -29,7 +29,10 @@ export function processFilterRulesWithPreload(
   fallbackUrls?: string[] | null,
   includeThirdParty = false
 ) {
-  const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
+  const downloadPromise = fetchAssets(
+    filterRulesUrl, fallbackUrls,
+    true, false, true
+  );
 
   return (span: Span) => span.traceChildAsync<
     Record<
@@ -192,47 +195,101 @@ const kwfilter = createKeywordFilter([
   '^popup'
 ]);
 
-export function parse($line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] {
-  if (
-    // doesn't include
-    !$line.includes('.') // rule with out dot can not be a domain
-    // includes
-    || kwfilter($line)
-    // note that this can only excludes $redirect but not $4-,redirect, so we still need to parse it
-    // this is only an early bail out
-  ) {
-    result[1] = ParseType.Null;
-    return result;
-  }
+/**
+ * The idea is that, TransformStream works kinda like a filter running on response. If we
+ * can filter lines before Array.fromAsync, we can create a smaller array, this saves memory
+ * and could improve performance.
+ */
+export class AdGuardFilterIgnoreUnsupportedLinesStream extends TransformStream<string, string> {
+  // private __buf = '';
+  constructor() {
+    super({
+      transform(line, controller) {
+        if (
+          // doesn't include
+          !line.includes('.') // rule with out dot can not be a domain
+          // includes
+          || kwfilter(line)
+          // note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it
+          // this is only an early bail out
+        ) {
+          return;
+        }
 
-  const line = $line.trim();
+        line = line.trim();
 
-  if (line.length === 0) {
-    result[1] = ParseType.Null;
-    return result;
-  }
+        if (line.length === 0) {
+          return;
+        }
 
-  const firstCharCode = line.charCodeAt(0);
-  const lastCharCode = line.charCodeAt(line.length - 1);
+        const firstCharCode = line.charCodeAt(0);
+        const lastCharCode = line.charCodeAt(line.length - 1);
+
+        if (
+          firstCharCode === 47 // 47 `/`
+          // ends with
+          // _160-600.
+          // -detect-adblock.
+          // _web-advert.
+          || lastCharCode === 46 // 46 `.`, line.endsWith('.')
+          || lastCharCode === 45 // 45 `-`, line.endsWith('-')
+          || lastCharCode === 95 // 95 `_`, line.endsWith('_')
+        ) {
+          return;
+        }
 
-  if (
-    firstCharCode === 47 // 47 `/`
-    // ends with
-    // _160-600.
-    // -detect-adblock.
-    // _web-advert.
-    || lastCharCode === 46 // 46 `.`, line.endsWith('.')
-    || lastCharCode === 45 // 45 `-`, line.endsWith('-')
-    || lastCharCode === 95 // 95 `_`, line.endsWith('_')
-  ) {
-    result[1] = ParseType.Null;
-    return result;
-  }
+        if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
+          return;
+        }
 
-  if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
-    result[1] = ParseType.Null;
-    return result;
+        controller.enqueue(line);
+      }
+    });
   }
+}
+
+export function parse(line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] {
+  // We have already done this in AdGuardFilterIgnoreUnsupportedLinesStream
+
+  // if (
+  //   // doesn't include
+  //   !$line.includes('.') // rule with out dot can not be a domain
+  //   // includes
+  //   || kwfilter($line)
+  //   // note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it
+  //   // this is only an early bail out
+  // ) {
+  //   result[1] = ParseType.Null;
+  //   return result;
+  // }
+
+  // const line = $line.trim();
+
+  // if (line.length === 0) {
+  //   result[1] = ParseType.Null;
+  //   return result;
+  // }
+
+  const firstCharCode = line.charCodeAt(0);
+  // const lastCharCode = line.charCodeAt(line.length - 1);
+
+  // if (
+  //   firstCharCode === 47 // 47 `/`
+  //   // ends with
+  //   // _160-600.
+  //   // -detect-adblock.
+  //   // _web-advert.
+  //   || lastCharCode === 46 // 46 `.`, line.endsWith('.')
+  //   || lastCharCode === 45 // 45 `-`, line.endsWith('-')
+  //   || lastCharCode === 95 // 95 `_`, line.endsWith('_')
+  // ) {
+  //   result[1] = ParseType.Null;
+  //   return result;
+  // }
+
+  // if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
+  //   return;
+  // }
 
   const filter = NetworkFilter.parse(line, false);
   if (filter) {