Browse Source

Perf: read most of files in one pass

SukkaW 2 years ago
parent
commit
a7fc13b355

+ 2 - 9
Build/build-anti-bogus-domain.ts

@@ -1,7 +1,7 @@
 // @ts-check
 // @ts-check
 import path from 'path';
 import path from 'path';
 import { createRuleset } from './lib/create-file';
 import { createRuleset } from './lib/create-file';
-import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
+import { fetchRemoteTextByLine, readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { processLine } from './lib/process-line';
 import { processLine } from './lib/process-line';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
@@ -34,14 +34,7 @@ const getBogusNxDomainIPsPromise = fsCache.apply(
 );
 );
 
 
 export const buildAntiBogusDomain = task(import.meta.path, async (span) => {
 export const buildAntiBogusDomain = task(import.meta.path, async (span) => {
-  const result: string[] = [];
-  for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/ip/reject.conf'))) {
-    const l = processLine(line);
-    if (l) {
-      result.push(l);
-    }
-  }
-
+  const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
   result.push(...(await getBogusNxDomainIPsPromise));
   result.push(...(await getBogusNxDomainIPsPromise));
 
 
   const description = [
   const description = [

+ 1 - 1
Build/build-apple-cdn.ts

@@ -18,7 +18,7 @@ export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsCache.app
 ));
 ));
 
 
 export const buildAppleCdn = task(import.meta.path, async (span) => {
 export const buildAppleCdn = task(import.meta.path, async (span) => {
-  const res = await getAppleCdnDomainsPromise();
+  const res = await span.traceChild('get apple cdn domains').traceAsyncFn(getAppleCdnDomainsPromise);
 
 
   const description = [
   const description = [
     ...SHARED_DESCRIPTION,
     ...SHARED_DESCRIPTION,

+ 2 - 11
Build/build-cdn-conf.ts

@@ -1,9 +1,8 @@
 import path from 'path';
 import path from 'path';
 import { createRuleset } from './lib/create-file';
 import { createRuleset } from './lib/create-file';
-import { readFileByLine } from './lib/fetch-text-by-line';
+import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { createTrie } from './lib/trie';
 import { createTrie } from './lib/trie';
 import { task } from './trace';
 import { task } from './trace';
-import { processLine } from './lib/process-line';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
 import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
 
 
@@ -44,15 +43,7 @@ const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
 
 
 const buildCdnConf = task(import.meta.path, async (span) => {
 const buildCdnConf = task(import.meta.path, async (span) => {
   /** @type {string[]} */
   /** @type {string[]} */
-  const cdnDomainsList: string[] = [];
-
-  for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'))) {
-    const line = processLine(l);
-    if (line) {
-      cdnDomainsList.push(line);
-    }
-  }
-
+  const cdnDomainsList: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'));
   (await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
   (await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
 
 
   const description: string[] = [
   const description: string[] = [

+ 2 - 3
Build/build-domestic-ruleset.ts

@@ -1,15 +1,14 @@
 // @ts-check
 // @ts-check
 import path from 'path';
 import path from 'path';
 import { DOMESTICS } from '../Source/non_ip/domestic';
 import { DOMESTICS } from '../Source/non_ip/domestic';
-import { readFileByLine } from './lib/fetch-text-by-line';
-import { processLineFromReadline } from './lib/process-line';
+import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { compareAndWriteFile, createRuleset } from './lib/create-file';
 import { compareAndWriteFile, createRuleset } from './lib/create-file';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { createMemoizedPromise } from './lib/memo-promise';
 
 
 export const getDomesticDomainsRulesetPromise = createMemoizedPromise(async () => {
 export const getDomesticDomainsRulesetPromise = createMemoizedPromise(async () => {
-  const results = await processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/domestic.conf')));
+  const results = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/domestic.conf'));
 
 
   results.push(
   results.push(
     ...Object.entries(DOMESTICS).reduce<string[]>((acc, [key, { domains }]) => {
     ...Object.entries(DOMESTICS).reduce<string[]>((acc, [key, { domains }]) => {

+ 10 - 11
Build/build-sspanel-appprofile.ts

@@ -1,8 +1,7 @@
 import { getAppleCdnDomainsPromise } from './build-apple-cdn';
 import { getAppleCdnDomainsPromise } from './build-apple-cdn';
 import { getDomesticDomainsRulesetPromise } from './build-domestic-ruleset';
 import { getDomesticDomainsRulesetPromise } from './build-domestic-ruleset';
 import { surgeRulesetToClashClassicalTextRuleset } from './lib/clash';
 import { surgeRulesetToClashClassicalTextRuleset } from './lib/clash';
-import { readFileByLine } from './lib/fetch-text-by-line';
-import { processLineFromReadline } from './lib/process-line';
+import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { task } from './trace';
 import { task } from './trace';
 import path from 'path';
 import path from 'path';
 
 
@@ -49,19 +48,19 @@ export const buildSSPanelUIMAppProfile = task(import.meta.path, async (span) =>
     getDomesticDomainsRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
     getDomesticDomainsRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
     getAppleCdnDomainsPromise().then(domains => domains.map(domain => `DOMAIN-SUFFIX,${domain}`)),
     getAppleCdnDomainsPromise().then(domains => domains.map(domain => `DOMAIN-SUFFIX,${domain}`)),
     getMicrosoftCdnRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
     getMicrosoftCdnRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/apple_cn.conf'))),
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/neteasemusic.conf'))).then(surgeRulesetToClashClassicalTextRuleset),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/apple_cn.conf')),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/neteasemusic.conf')).then(surgeRulesetToClashClassicalTextRuleset),
     // microsoft & apple - domains
     // microsoft & apple - domains
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/microsoft.conf'))),
-    (processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/apple_services.conf')))).then(surgeRulesetToClashClassicalTextRuleset),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/microsoft.conf')),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/apple_services.conf')).then(surgeRulesetToClashClassicalTextRuleset),
     // stream - domains
     // stream - domains
     surgeRulesetToClashClassicalTextRuleset(AllStreamServices.flatMap((i) => i.rules)),
     surgeRulesetToClashClassicalTextRuleset(AllStreamServices.flatMap((i) => i.rules)),
     // global - domains
     // global - domains
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/global.conf'))).then(surgeRulesetToClashClassicalTextRuleset),
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/global_plus.conf'))).then(surgeRulesetToClashClassicalTextRuleset),
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/telegram.conf'))).then(surgeRulesetToClashClassicalTextRuleset),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/global.conf')).then(surgeRulesetToClashClassicalTextRuleset),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/global_plus.conf')).then(surgeRulesetToClashClassicalTextRuleset),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/telegram.conf')).then(surgeRulesetToClashClassicalTextRuleset),
     // lan - domains
     // lan - domains
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/lan.conf'))),
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/lan.conf')),
     // domestic - ip cidr
     // domestic - ip cidr
     getChnCidrPromise().then(cidrs => cidrs.map(cidr => `IP-CIDR,${cidr}`)),
     getChnCidrPromise().then(cidrs => cidrs.map(cidr => `IP-CIDR,${cidr}`)),
     AllStreamServices.flatMap((i) => (
     AllStreamServices.flatMap((i) => (
@@ -75,7 +74,7 @@ export const buildSSPanelUIMAppProfile = task(import.meta.path, async (span) =>
     // global - ip cidr
     // global - ip cidr
     getTelegramCIDRPromise(),
     getTelegramCIDRPromise(),
     // lan - ip cidr
     // lan - ip cidr
-    processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/ip/lan.conf')))
+    readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/lan.conf'))
   ] as const);
   ] as const);
 
 
   const telegramCidrs = rawTelegramCidrs.map(removeNoResolved);
   const telegramCidrs = rawTelegramCidrs.map(removeNoResolved);

+ 17 - 0
Build/lib/fetch-text-by-line.bench.ts

@@ -0,0 +1,17 @@
+import { bench, group, run } from 'mitata';
+import { processLine, processLineFromReadline } from './process-line';
+import { readFileByLine } from './fetch-text-by-line';
+import path from 'path';
+import fsp from 'fs/promises';
+
+const file = path.resolve(import.meta.dir, '../../Source/domainset/cdn.conf');
+
+group('read file by line', () => {
+  bench('readline', () => processLineFromReadline(readFileByLine(file)));
+
+  bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n').filter(processLine)));
+
+  bench('Bun.file', () => Bun.file(file).text().then((content) => content.split('\n').filter(processLine)));
+});
+
+run();

+ 12 - 0
Build/lib/fetch-text-by-line.ts

@@ -3,6 +3,7 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
 
 
 import { TextLineStream } from './text-line-transform-stream';
 import { TextLineStream } from './text-line-transform-stream';
 import { PolyfillTextDecoderStream } from './text-decoder-stream';
 import { PolyfillTextDecoderStream } from './text-decoder-stream';
+import { processLine } from './process-line';
 // function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
 // function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
 //   return stream
 //   return stream
 //     .pipeThrough(new PolyfillTextDecoderStream())
 //     .pipeThrough(new PolyfillTextDecoderStream())
@@ -54,3 +55,14 @@ export function createReadlineInterfaceFromResponse(this: void, resp: Response)
 export function fetchRemoteTextByLine(url: string | URL) {
 export function fetchRemoteTextByLine(url: string | URL) {
   return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
   return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
 }
 }
+
+export async function readFileIntoProcessedArray(file: string | URL | BunFile) {
+  if (typeof file === 'string') {
+    file = Bun.file(file);
+  } else if (!('writer' in file)) {
+    file = Bun.file(file);
+  }
+
+  const content = await file.text();
+  return content.split('\n').filter(processLine);
+}