|
|
@@ -1,56 +1,76 @@
|
|
|
import path from 'node:path';
|
|
|
-import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
|
|
-import { HostnameTrie } from './lib/trie';
|
|
|
+import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
|
|
import { task } from './trace';
|
|
|
import { SHARED_DESCRIPTION } from './constants/description';
|
|
|
import { appendArrayInPlace } from 'foxts/append-array-in-place';
|
|
|
import { SOURCE_DIR } from './constants/dir';
|
|
|
import { DomainsetOutput } from './lib/rules/domainset';
|
|
|
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
|
|
|
-import { appendSetElementsToArray } from 'foxts/append-set-elements-to-array';
|
|
|
+import Worktank from 'worktank';
|
|
|
|
|
|
-const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
|
|
|
- const trie = new HostnameTrie();
|
|
|
+const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
|
|
|
+const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
|
|
|
+const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
|
|
|
|
|
|
- for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
|
|
|
- trie.add(line);
|
|
|
- }
|
|
|
+const pool = new Worktank({
|
|
|
+ pool: {
|
|
|
+ name: 'extract-s3-from-publicssuffix',
|
|
|
+ size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
|
|
|
+ },
|
|
|
+ worker: {
|
|
|
+ autoAbort: 10000,
|
|
|
+ autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
|
|
|
+ autoInstantiate: true,
|
|
|
+ methods: {
|
|
|
+ // eslint-disable-next-line object-shorthand -- workertank
|
|
|
+ getS3OSSDomains: async function (importMetaUrl: string): Promise<string[]> {
|
|
|
+ // TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
|
|
|
+ const { default: module } = await import('node:module');
|
|
|
+ const __require = module.createRequire(importMetaUrl);
|
|
|
|
|
|
- /**
|
|
|
- * Extract OSS domain from publicsuffix list
|
|
|
- */
|
|
|
- const S3OSSDomains = new Set<string>();
|
|
|
+ const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie');
|
|
|
+ const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
|
|
|
|
|
|
- trie.find('.amazonaws.com').forEach((line: string) => {
|
|
|
- if (
|
|
|
- (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
- && !line.includes('cn-')
|
|
|
- ) {
|
|
|
- S3OSSDomains.add('.' + line);
|
|
|
- }
|
|
|
- });
|
|
|
- trie.find('.scw.cloud').forEach((line: string) => {
|
|
|
- if (
|
|
|
- (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
- // && !line.includes('cn-')
|
|
|
- ) {
|
|
|
- S3OSSDomains.add('.' + line);
|
|
|
- }
|
|
|
- });
|
|
|
- trie.find('sakurastorage.jp').forEach((line: string) => {
|
|
|
- if (
|
|
|
- (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
- ) {
|
|
|
- S3OSSDomains.add('.' + line);
|
|
|
- }
|
|
|
- });
|
|
|
+ const trie = new HostnameTrie();
|
|
|
|
|
|
- return S3OSSDomains;
|
|
|
-})();
|
|
|
+ for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
|
|
|
+ trie.add(line);
|
|
|
+ }
|
|
|
|
|
|
-const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
|
|
|
-const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
|
|
|
-const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
|
|
|
+ /**
|
|
|
+ * Extract OSS domain from publicsuffix list
|
|
|
+ */
|
|
|
+ const S3OSSDomains: string[] = [];
|
|
|
+
|
|
|
+ trie.find('.amazonaws.com').forEach((line: string) => {
|
|
|
+ if (
|
|
|
+ (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
+ && !line.includes('cn-')
|
|
|
+ ) {
|
|
|
+ S3OSSDomains.push('.' + line);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ trie.find('.scw.cloud').forEach((line: string) => {
|
|
|
+ if (
|
|
|
+ (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
+ // && !line.includes('cn-')
|
|
|
+ ) {
|
|
|
+ S3OSSDomains.push('.' + line);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ trie.find('sakurastorage.jp').forEach((line: string) => {
|
|
|
+ if (
|
|
|
+ (line.startsWith('s3-') || line.startsWith('s3.'))
|
|
|
+ ) {
|
|
|
+ S3OSSDomains.push('.' + line);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ return S3OSSDomains;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+});
|
|
|
|
|
|
export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
|
|
|
const [
|
|
|
@@ -59,14 +79,17 @@ export const buildCdnDownloadConf = task(require.main === module, __filename)(as
|
|
|
downloadDomainSet,
|
|
|
steamDomainSet
|
|
|
] = await Promise.all([
|
|
|
- span.traceChildPromise('download public suffix list for s3', getS3OSSDomainsPromise),
|
|
|
+ span.traceChildAsync('download public suffix list for s3', () => pool.exec(
|
|
|
+ 'getS3OSSDomains',
|
|
|
+ [import.meta.url]
|
|
|
+ ).finally(() => pool.terminate())),
|
|
|
cdnDomainsListPromise,
|
|
|
downloadDomainSetPromise,
|
|
|
steamDomainSetPromise
|
|
|
]);
|
|
|
|
|
|
// Move S3 domains to download domain set, since S3 files may be large
|
|
|
- appendSetElementsToArray(downloadDomainSet, S3OSSDomains);
|
|
|
+ appendArrayInPlace(downloadDomainSet, S3OSSDomains);
|
|
|
appendArrayInPlace(downloadDomainSet, steamDomainSet);
|
|
|
|
|
|
// we have whitelisted the crashlytics domain, and we also want to put it in CDN policy
|