ソースを参照

Perf: add more cache

SukkaW 2 年 前
コミット
8c150e87c2

+ 1 - 4
Build/build-cdn-conf.ts

@@ -7,10 +7,7 @@ import { processLine } from './lib/process-line';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
 import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
 const getS3OSSDomains = async (): Promise<Set<string>> => {
 const getS3OSSDomains = async (): Promise<Set<string>> => {
-  const trie = createTrie();
-  for await (const line of (await getPublicSuffixListTextPromise()).split('\n')) {
-    trie.add(line);
-  }
+  const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
 
 
   /**
   /**
    * Extract OSS domain from publicsuffix list
    * Extract OSS domain from publicsuffix list

+ 2 - 0
Build/lib/cache-filesystem.ts

@@ -136,7 +136,9 @@ const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max
 
 
 // Add some randomness to the cache ttl to avoid thundering herd
 // Add some randomness to the cache ttl to avoid thundering herd
 export const TTL = {
 export const TTL = {
+  THREE_HOURS: () => randomInt(2, 4) * 60 * 60 * 1000,
   TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000,
   TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000,
+  ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000,
   THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000,
   THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000,
   ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000,
   ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000,
   TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000,
   TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000,

+ 2 - 2
Build/lib/get-gorhill-publicsuffix.ts

@@ -3,9 +3,9 @@ import { traceAsync } from './trace-runner';
 import { createMemoizedPromise } from './memo-promise';
 import { createMemoizedPromise } from './memo-promise';
 import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist';
 import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist';
 
 
-export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
-  const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
+const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
 
 
+export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
   const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
   const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
     getPublicSuffixListTextPromise(),
     getPublicSuffixListTextPromise(),
     import('@gorhill/publicsuffixlist')
     import('@gorhill/publicsuffixlist')

+ 4 - 1
Build/lib/parse-filter.ts

@@ -173,6 +173,9 @@ export async function processFilterRules(
           lineCb(line);
           lineCb(line);
         }
         }
       } else {
       } else {
+        // Avoid event loop starvation, so we wait for a macrotask before we start fetching.
+        await Promise.resolve();
+
         const filterRules = (await traceAsync(
         const filterRules = (await traceAsync(
           picocolors.gray(`- download ${filterRulesUrl}`),
           picocolors.gray(`- download ${filterRulesUrl}`),
           () => fetchAssets(filterRulesUrl, fallbackUrls),
           () => fetchAssets(filterRulesUrl, fallbackUrls),
@@ -191,7 +194,7 @@ export async function processFilterRules(
         Array.from(whitelistDomainSets),
         Array.from(whitelistDomainSets),
         Array.from(blacklistDomainSets),
         Array.from(blacklistDomainSets),
         warningMessages
         warningMessages
-      ];
+      ] as const;
     },
     },
     {
     {
       ttl,
       ttl,

+ 29 - 23
Build/lib/reject-data-source.ts

@@ -1,8 +1,8 @@
 import { TTL } from './cache-filesystem';
 import { TTL } from './cache-filesystem';
 
 
 export const HOSTS = [
 export const HOSTS = [
-  ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true],
-  ['https://someonewhocares.org/hosts/hosts', true],
+  ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()],
+  ['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()],
   // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
   // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
   ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
   ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
   // have not been updated for more than a year, so we set a 14 days cache ttl
   // have not been updated for more than a year, so we set a 14 days cache ttl
@@ -11,14 +11,11 @@ export const HOSTS = [
   ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()],
   ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()],
   // ad-wars is not actively maintained, so we set a 7 days cache ttl
   // ad-wars is not actively maintained, so we set a 7 days cache ttl
   ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
   ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
-  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true],
-  // CoinBlockerList
-  // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
-  ['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, TTL.TWO_WEEKS()],
+  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()],
   // Curben's UrlHaus Malicious URL Blocklist
   // Curben's UrlHaus Malicious URL Blocklist
   // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
   // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
   // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
   // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
-  ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
+  ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()],
   // Curben's Phishing URL Blocklist
   // Curben's Phishing URL Blocklist
   // Covered by lib/get-phishing-domains.ts
   // Covered by lib/get-phishing-domains.ts
   // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
   // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
@@ -32,12 +29,15 @@ export const HOSTS = [
 ] as const;
 ] as const;
 
 
 export const DOMAIN_LISTS = [
 export const DOMAIN_LISTS = [
+  // CoinBlockerList
+  // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
+  ['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()],
   // BarbBlock
   // BarbBlock
   // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
   // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
   ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
   ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
   // DigitalSide Threat-Intel - OSINT Hub
   // DigitalSide Threat-Intel - OSINT Hub
   // Update once per day
   // Update once per day
-  ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, 24 * 60 * 60 * 1000],
+  ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
   // AdGuard CNAME Filter Combined
   // AdGuard CNAME Filter Combined
   // Update on a 7 days basis, so we add a 3 hours cache ttl
   // Update on a 7 days basis, so we add a 3 hours cache ttl
   ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
   ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
@@ -52,11 +52,11 @@ export const ADGUARD_FILTERS = [
   [
   [
     'https://easylist.to/easylist/easylist.txt',
     'https://easylist.to/easylist/easylist.txt',
     [
     [
+      'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
+      'https://ublockorigin.pages.dev/thirdparties/easylist.txt',
       'https://easylist-downloads.adblockplus.org/easylist.txt',
       'https://easylist-downloads.adblockplus.org/easylist.txt',
       'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
       'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
-      'https://secure.fanboy.co.nz/easylist.txt',
-      'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
-      'https://ublockorigin.pages.dev/thirdparties/easylist.txt'
+      'https://secure.fanboy.co.nz/easylist.txt'
     ],
     ],
     TTL.TWLVE_HOURS()
     TTL.TWLVE_HOURS()
   ],
   ],
@@ -86,21 +86,24 @@ export const ADGUARD_FILTERS = [
     'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
     'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
     [
     [
       'https://ublockorigin.pages.dev/filters/filters.min.txt'
       'https://ublockorigin.pages.dev/filters/filters.min.txt'
-    ]
+    ],
+    TTL.THREE_HOURS()
   ],
   ],
   // uBlock Origin Badware Risk List
   // uBlock Origin Badware Risk List
   [
   [
     'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
     'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
     [
     [
       'https://ublockorigin.pages.dev/filters/badware.min.txt'
       'https://ublockorigin.pages.dev/filters/badware.min.txt'
-    ]
+    ],
+    TTL.THREE_HOURS()
   ],
   ],
   // uBlock Origin Privacy List
   // uBlock Origin Privacy List
   [
   [
     'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
     'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
     [
     [
       'https://ublockorigin.pages.dev/filters/privacy.min.txt'
       'https://ublockorigin.pages.dev/filters/privacy.min.txt'
-    ]
+    ],
+    TTL.THREE_HOURS()
   ],
   ],
   // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
   // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
   // [
   // [
@@ -114,29 +117,32 @@ export const ADGUARD_FILTERS = [
     'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
     'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
     [
     [
       'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
       'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
-    ]
+    ],
+    TTL.THREE_HOURS()
   ],
   ],
   // AdGuard Base Filter
   // AdGuard Base Filter
-  'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt',
+  ['https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', null, TTL.THREE_HOURS()],
   // AdGuard Mobile AD
   // AdGuard Mobile AD
-  'https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt',
+  ['https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', null, TTL.THREE_HOURS()],
   // AdGuard Tracking Protection
   // AdGuard Tracking Protection
-  'https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt',
+  ['https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', null, TTL.THREE_HOURS()],
   // AdGuard Japanese filter
   // AdGuard Japanese filter
-  'https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt',
+  ['https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', null, TTL.THREE_HOURS()],
   // AdGuard Chinese filter (EasyList China + AdGuard Chinese filter)
   // AdGuard Chinese filter (EasyList China + AdGuard Chinese filter)
-  'https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt',
+  ['https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', null, TTL.THREE_HOURS()],
   // AdGuard Annoyances filter
   // AdGuard Annoyances filter
-  'https://filters.adtidy.org/android/filters/14_optimized.txt',
+  ['https://filters.adtidy.org/android/filters/14_optimized.txt', null, TTL.THREE_HOURS()],
   // EasyList Germany filter
   // EasyList Germany filter
   [
   [
     'https://easylist.to/easylistgermany/easylistgermany.txt',
     'https://easylist.to/easylistgermany/easylistgermany.txt',
     [
     [
       'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
       'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
-    ]
+    ],
+    TTL.TWLVE_HOURS()
   ],
   ],
   // GameConsoleAdblockList
   // GameConsoleAdblockList
-  'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
+  // Update almost once per 1 to 3 months, let's set a 10 days cache ttl
+  ['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null, TTL.TEN_DAYS()],
   // PiHoleBlocklist
   // PiHoleBlocklist
   // Update almost once per 3 months, let's set a 10 days cache ttl
   // Update almost once per 3 months, let's set a 10 days cache ttl
   [
   [