ソースを参照

Perf: prefer hosts over AdBlock syntax

SukkaW 2 年 前
コミット
4cda4df451
3 ファイル変更50 行追加41 行削除
  1. 11 10
      Build/build-phishing-domainset.ts
  2. 2 2
      Build/lib/parse-filter.ts
  3. 37 29
      Build/lib/reject-data-source.ts

+ 11 - 10
Build/build-phishing-domainset.ts

@@ -1,4 +1,4 @@
-import { processFilterRules } from './lib/parse-filter';
+import { processFilterRules, processHosts } from './lib/parse-filter';
 import path from 'path';
 import { createRuleset } from './lib/create-file';
 import { processLine } from './lib/process-line';
@@ -65,15 +65,16 @@ const BLACK_TLD = new Set([
 ]);
 
 export const buildPhishingDomainSet = task(import.meta.path, async () => {
-  const [{ black: domainSet }, gorhill] = await Promise.all([
-    processFilterRules(
-      'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
-      [
-        'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
-        // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
-        // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
-      ]
-    ),
+  const [domainSet, gorhill] = await Promise.all([
+    processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
+    // processFilterRules(
+    //   'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
+    //   [
+    //     'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
+    //     // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
+    //     // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
+    //   ]
+    // ),
     getGorhillPublicSuffixPromise()
   ]);
 

+ 2 - 2
Build/lib/parse-filter.ts

@@ -62,7 +62,7 @@ export async function processDomainLists(domainListsUrl: string | URL) {
   return domainSets;
 }
 
-export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false) {
+export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) {
   console.time(`- processHosts: ${hostsUrl}`);
 
   if (typeof hostsUrl === 'string') {
@@ -85,7 +85,7 @@ export async function processHosts(hostsUrl: string | URL, includeAllSubDomain =
       foundDebugDomain = true;
     }
 
-    const domain = normalizeDomain(_domain);
+    const domain = skipDomainCheck ? _domain : normalizeDomain(_domain);
     if (domain) {
       if (includeAllSubDomain) {
         domainSets.add(`.${domain}`);

+ 37 - 29
Build/lib/reject-data-source.ts

@@ -1,11 +1,19 @@
-export const HOSTS: [string, boolean][] = [
+export const HOSTS = [
   // ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', false],
   ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false],
   ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', false],
   ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false],
   ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
-  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false]
-];
+  ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false],
+  // Curben's UrlHaus Malicious URL Blocklist
+  ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
+  // Curben's Phishing URL Blocklist
+  ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
+  // Curben's PUP Domains Blocklist
+  ['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true],
+  // BarbBlock
+  ['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
+] as const;
 
 export const ADGUARD_FILTERS = [
   // EasyList
@@ -103,33 +111,33 @@ export const ADGUARD_FILTERS = [
     ]
   ],
   // Curben's UrlHaus Malicious URL Blocklist
-  [
-    'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
-    [
-      'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
-      // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
-      // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
-      'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
-    ]
-  ],
+  // [
+  //   'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
+  //   [
+  //     'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
+  //     // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
+  //     // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
+  //     'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
+  //   ]
+  // ],
   // Curben's Phishing URL Blocklist
-  [
-    'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
-    [
-      'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
-      // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
-      // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
-    ]
-  ],
+  // [
+  //   'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
+  //   [
+  //     'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
+  //     // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
+  //     // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
+  //   ]
+  // ],
   // Curben's PUP Domains Blocklist
-  [
-    'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
-    [
-      'https://pup-filter.pages.dev/pup-filter-agh.txt'
-      // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
-      // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
-    ]
-  ],
+  // [
+  //   'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
+  //   [
+  //     'https://pup-filter.pages.dev/pup-filter-agh.txt'
+  //     // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
+  //     // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
+  //   ]
+  // ],
   // GameConsoleAdblockList
   'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
   // PiHoleBlocklist
@@ -142,7 +150,7 @@ export const ADGUARD_FILTERS = [
   // Spam404
   'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt',
   // BarbBlock
-  'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
+  // 'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
   // Brave First Party & First Party CNAME
   'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
 ] as const;