Browse Source

Perf: process publicsuffix in a worker thread

SukkaW 8 months ago
parent
commit
83e94403e8
1 changed files with 65 additions and 42 deletions
  1. 65 42
      Build/build-cdn-download-conf.ts

+ 65 - 42
Build/build-cdn-download-conf.ts

@@ -1,56 +1,76 @@
 import path from 'node:path';
 import path from 'node:path';
-import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
-import { HostnameTrie } from './lib/trie';
+import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { task } from './trace';
 import { task } from './trace';
 import { SHARED_DESCRIPTION } from './constants/description';
 import { SHARED_DESCRIPTION } from './constants/description';
 import { appendArrayInPlace } from 'foxts/append-array-in-place';
 import { appendArrayInPlace } from 'foxts/append-array-in-place';
 import { SOURCE_DIR } from './constants/dir';
 import { SOURCE_DIR } from './constants/dir';
 import { DomainsetOutput } from './lib/rules/domainset';
 import { DomainsetOutput } from './lib/rules/domainset';
 import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
 import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
-import { appendSetElementsToArray } from 'foxts/append-set-elements-to-array';
+import Worktank from 'worktank';
 
 
-const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
-  const trie = new HostnameTrie();
+const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
+const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
+const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
 
 
-  for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
-    trie.add(line);
-  }
+const pool = new Worktank({
+  pool: {
+    name: 'extract-s3-from-publicssuffix',
+    size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
+  },
+  worker: {
+    autoAbort: 10000,
+    autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
+    autoInstantiate: true,
+    methods: {
+      // eslint-disable-next-line object-shorthand -- workertank
+      getS3OSSDomains: async function (importMetaUrl: string): Promise<string[]> {
+        // TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
+        const { default: module } = await import('node:module');
+        const __require = module.createRequire(importMetaUrl);
 
 
-  /**
-   * Extract OSS domain from publicsuffix list
-   */
-  const S3OSSDomains = new Set<string>();
+        const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie');
+        const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
 
 
-  trie.find('.amazonaws.com').forEach((line: string) => {
-    if (
-      (line.startsWith('s3-') || line.startsWith('s3.'))
-      && !line.includes('cn-')
-    ) {
-      S3OSSDomains.add('.' + line);
-    }
-  });
-  trie.find('.scw.cloud').forEach((line: string) => {
-    if (
-      (line.startsWith('s3-') || line.startsWith('s3.'))
-      // && !line.includes('cn-')
-    ) {
-      S3OSSDomains.add('.' + line);
-    }
-  });
-  trie.find('sakurastorage.jp').forEach((line: string) => {
-    if (
-      (line.startsWith('s3-') || line.startsWith('s3.'))
-    ) {
-      S3OSSDomains.add('.' + line);
-    }
-  });
+        const trie = new HostnameTrie();
 
 
-  return S3OSSDomains;
-})();
+        for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
+          trie.add(line);
+        }
 
 
-const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
-const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
-const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
+        /**
+         * Extract OSS domain from publicsuffix list
+         */
+        const S3OSSDomains: string[] = [];
+
+        trie.find('.amazonaws.com').forEach((line: string) => {
+          if (
+            (line.startsWith('s3-') || line.startsWith('s3.'))
+            && !line.includes('cn-')
+          ) {
+            S3OSSDomains.push('.' + line);
+          }
+        });
+        trie.find('.scw.cloud').forEach((line: string) => {
+          if (
+            (line.startsWith('s3-') || line.startsWith('s3.'))
+          // && !line.includes('cn-')
+          ) {
+            S3OSSDomains.push('.' + line);
+          }
+        });
+        trie.find('sakurastorage.jp').forEach((line: string) => {
+          if (
+            (line.startsWith('s3-') || line.startsWith('s3.'))
+          ) {
+            S3OSSDomains.push('.' + line);
+          }
+        });
+
+        return S3OSSDomains;
+      }
+    }
+  }
+});
 
 
 export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
 export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
   const [
   const [
@@ -59,14 +79,17 @@ export const buildCdnDownloadConf = task(require.main === module, __filename)(as
     downloadDomainSet,
     downloadDomainSet,
     steamDomainSet
     steamDomainSet
   ] = await Promise.all([
   ] = await Promise.all([
-    span.traceChildPromise('download public suffix list for s3', getS3OSSDomainsPromise),
+    span.traceChildAsync('download public suffix list for s3', () => pool.exec(
+      'getS3OSSDomains',
+      [import.meta.url]
+    ).finally(() => pool.terminate())),
     cdnDomainsListPromise,
     cdnDomainsListPromise,
     downloadDomainSetPromise,
     downloadDomainSetPromise,
     steamDomainSetPromise
     steamDomainSetPromise
   ]);
   ]);
 
 
   // Move S3 domains to download domain set, since S3 files may be large
   // Move S3 domains to download domain set, since S3 files may be large
-  appendSetElementsToArray(downloadDomainSet, S3OSSDomains);
+  appendArrayInPlace(downloadDomainSet, S3OSSDomains);
   appendArrayInPlace(downloadDomainSet, steamDomainSet);
   appendArrayInPlace(downloadDomainSet, steamDomainSet);
 
 
   // we have whitelisted the crashlytics domain, and we also want to put it in CDN policy
   // we have whitelisted the crashlytics domain, and we also want to put it in CDN policy