Browse Source

Perf: faster `fetchAssets` (without string and manual split)

SukkaW 1 year ago
parent
commit
07419a7942

+ 5 - 9
Build/build-reject-ip-list.ts

@@ -4,7 +4,6 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './constants/description';
 import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
-import { processLine } from './lib/process-line';
 import { RulesetOutput } from './lib/create-file';
 import { SOURCE_DIR } from './constants/dir';
 import { $$fetch } from './lib/fetch-retry';
@@ -37,14 +36,11 @@ const BOTNET_FILTER_MIRROR_URL = [
   // https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt
 ];
 
-const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
-  const ip = processLine(cur);
-  if (ip) {
-    if (isProbablyIpv4(ip)) {
-      acc[0].push(ip);
-    } else if (isProbablyIpv6(ip)) {
-      acc[1].push(ip);
-    }
+const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL, true).then(arr => arr.reduce<[ipv4: string[], ipv6: string[]]>((acc, ip) => {
+  if (isProbablyIpv4(ip)) {
+    acc[0].push(ip);
+  } else if (isProbablyIpv6(ip)) {
+    acc[1].push(ip);
   }
   return acc;
 }, [[], []]));

+ 12 - 4
Build/lib/fetch-assets.ts

@@ -1,6 +1,9 @@
 import picocolors from 'picocolors';
 import { $$fetch, defaultRequestInit, ResponseError } from './fetch-retry';
 import { waitWithAbort } from 'foxts/wait';
+import { nullthrow } from 'foxts/guard';
+import { TextLineStream } from './text-line-transform-stream';
+import { ProcessLineStream } from './process-line';
 
 // eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
 export class CustomAbortError extends Error {
@@ -26,7 +29,7 @@ export class CustomNoETagFallbackError extends Error {
   }
 }
 
-export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[]) {
+export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false) {
   const controller = new AbortController();
 
   const createFetchFallbackPromise = async (url: string, index: number) => {
@@ -44,14 +47,19 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined |
       throw new CustomAbortError();
     }
     const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit });
-    const text = await res.text();
 
-    if (text.length < 2) {
+    let stream = nullthrow(res.body).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream());
+    if (processLine) {
+      stream = stream.pipeThrough(new ProcessLineStream());
+    }
+    const arr = await Array.fromAsync(stream);
+
+    if (arr.length < 1) {
       throw new ResponseError(res, url, 'empty response w/o 304');
     }
 
     controller.abort();
-    return text;
+    return arr;
   };
 
   if (!fallbackUrls || fallbackUrls.length === 0) {

+ 6 - 10
Build/lib/parse-filter/domainlists.ts

@@ -16,10 +16,7 @@ function domainListLineCb(l: string, set: string[], meta: string, normalizeDomai
   set.push(domain);
 }
 
-function domainListLineCbIncludeAllSubdomain(l: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) {
-  const line = processLine(l);
-  if (!line) return;
-
+function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) {
   const domain = normalizeDomain(line);
   if (!domain) return;
 
@@ -36,12 +33,12 @@ export function processDomainLists(
   const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
 
   return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
-    const text = await span.traceChildAsync('download', () => fetchAssets(
+    const filterRules = await span.traceChildAsync('download', () => fetchAssets(
       domainListsUrl,
-      mirrors
+      mirrors,
+      true
     ));
     const domainSets: string[] = [];
-    const filterRules = text.split('\n');
 
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
@@ -59,13 +56,12 @@ export function processDomainListsWithPreload(
 ) {
   const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
 
-  const downloadPromise = fetchAssets(domainListsUrl, mirrors);
+  const downloadPromise = fetchAssets(domainListsUrl, mirrors, true);
   const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
 
   return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
-    const text = await span.traceChildPromise('download', downloadPromise);
+    const filterRules = await span.traceChildPromise('download', downloadPromise);
     const domainSets: string[] = [];
-    const filterRules = text.split('\n');
 
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {

+ 1 - 3
Build/lib/parse-filter/filters.ts

@@ -28,7 +28,7 @@ export function processFilterRulesWithPreload(
   const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
 
   return (span: Span) => span.traceChildAsync<Record<'whiteDomains' | 'whiteDomainSuffixes' | 'blackDomains' | 'blackDomainSuffixes', string[]>>(`process filter rules: ${filterRulesUrl}`, async (span) => {
-    const text = await span.traceChildPromise('download', downloadPromise);
+    const filterRules = await span.traceChildPromise('download', downloadPromise);
 
     const whiteDomains = new Set<string>();
     const whiteDomainSuffixes = new Set<string>();
@@ -82,8 +82,6 @@ export function processFilterRulesWithPreload(
       }
     };
 
-    const filterRules = text.split('\n');
-
     span.traceChild('parse adguard filter').traceSyncFn(() => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
         lineCb(filterRules[i]);

+ 4 - 14
Build/lib/parse-filter/hosts.ts

@@ -1,15 +1,9 @@
 import type { Span } from '../../trace';
 import { fetchAssets } from '../fetch-assets';
 import { fastNormalizeDomain } from '../normalize-domain';
-import { processLine } from '../process-line';
 import { onBlackFound } from './shared';
 
-function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
-  const line = processLine(l);
-  if (!line) {
-    return;
-  }
-
+function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) {
   const _domain = line.split(/\s/)[1]?.trim();
   if (!_domain) {
     return;
@@ -29,12 +23,10 @@ export function processHosts(
   hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
 ) {
   return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
-    const text = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors));
+    const filterRules = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors, true));
 
     const domainSets: string[] = [];
 
-    const filterRules = text.split('\n');
-
     span.traceChild('parse hosts').traceSyncFn(() => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
         hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
@@ -46,15 +38,13 @@ export function processHosts(
 }
 
 export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
-  const downloadPromise = fetchAssets(hostsUrl, mirrors);
+  const downloadPromise = fetchAssets(hostsUrl, mirrors, true);
 
   return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
-    const text = await span.traceChild('download').tracePromise(downloadPromise);
+    const filterRules = await span.traceChild('download').tracePromise(downloadPromise);
 
     const domainSets: string[] = [];
 
-    const filterRules = text.split('\n');
-
     span.traceChild('parse hosts').traceSyncFn(() => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
         hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);