瀏覽代碼

Add new phishing feed / speed up domains sort

SukkaW 2 年之前
父節點
當前提交
e970006445

+ 4 - 4
Build/build-internal-cdn-rules.ts

@@ -3,7 +3,7 @@ import path from 'path';
 import * as tldts from 'tldts';
 import * as tldts from 'tldts';
 import { processLine } from './lib/process-line';
 import { processLine } from './lib/process-line';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { readFileByLine } from './lib/fetch-text-by-line';
-import { createDomainSorter } from './lib/stable-sort-domain';
+import { sortDomains } from './lib/stable-sort-domain';
 import { task } from './lib/trace-runner';
 import { task } from './lib/trace-runner';
 import { compareAndWriteFile } from './lib/create-file';
 import { compareAndWriteFile } from './lib/create-file';
 import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
 import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
@@ -58,8 +58,8 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
     }
     }
   };
   };
 
 
-  const [domainSorter] = await Promise.all([
-    getGorhillPublicSuffixPromise().then(createDomainSorter),
+  const [gorhill] = await Promise.all([
+    getGorhillPublicSuffixPromise(),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),
@@ -74,7 +74,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
 
 
   return compareAndWriteFile(
   return compareAndWriteFile(
     [
     [
-      ...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
+      ...sortDomains(Array.from(set), gorhill).map(i => `SUFFIX,${i}`),
       ...Array.from(keywords).sort().map(i => `REGEX,${i}`)
       ...Array.from(keywords).sort().map(i => `REGEX,${i}`)
     ],
     ],
     path.resolve(import.meta.dir, '../List/internal/cdn.txt')
     path.resolve(import.meta.dir, '../List/internal/cdn.txt')

+ 12 - 8
Build/build-reject-domainset.ts

@@ -1,17 +1,16 @@
 // @ts-check
 // @ts-check
-import fsp from 'fs/promises';
 import path from 'path';
 import path from 'path';
 
 
-import { processHosts, processFilterRules } from './lib/parse-filter';
+import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
 import { createTrie } from './lib/trie';
 import { createTrie } from './lib/trie';
 
 
-import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } from './lib/reject-data-source';
+import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST, DOMAIN_LISTS } from './lib/reject-data-source';
 import { createRuleset, compareAndWriteFile } from './lib/create-file';
 import { createRuleset, compareAndWriteFile } from './lib/create-file';
 import { processLine } from './lib/process-line';
 import { processLine } from './lib/process-line';
 import { domainDeduper } from './lib/domain-deduper';
 import { domainDeduper } from './lib/domain-deduper';
 import createKeywordFilter from './lib/aho-corasick';
 import createKeywordFilter from './lib/aho-corasick';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { readFileByLine } from './lib/fetch-text-by-line';
-import { createDomainSorter } from './lib/stable-sort-domain';
+import { sortDomains } from './lib/stable-sort-domain';
 import { traceSync, task, traceAsync } from './lib/trace-runner';
 import { traceSync, task, traceAsync } from './lib/trace-runner';
 import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
 import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
 import * as tldts from 'tldts';
 import * as tldts from 'tldts';
@@ -38,6 +37,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
           domainSets.add(host);
           domainSets.add(host);
         });
         });
       })),
       })),
+      ...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1])),
       ...ADGUARD_FILTERS.map(input => {
       ...ADGUARD_FILTERS.map(input => {
         const promise = typeof input === 'string'
         const promise = typeof input === 'string'
           ? processFilterRules(input)
           ? processFilterRules(input)
@@ -144,14 +144,15 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
 
 
   // Dedupe domainSets
   // Dedupe domainSets
   const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
   const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
-  console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
+  console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
+  console.log(`Final size ${dudupedDominArray.length}`);
 
 
   // Create reject stats
   // Create reject stats
   const rejectDomainsStats: Array<[string, number]> = traceSync(
   const rejectDomainsStats: Array<[string, number]> = traceSync(
     '* Collect reject domain stats',
     '* Collect reject domain stats',
     () => Object.entries(
     () => Object.entries(
       dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
       dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
-        const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false });
+        const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false });
         if (suffix) {
         if (suffix) {
           acc[suffix] = (acc[suffix] ?? 0) + 1;
           acc[suffix] = (acc[suffix] ?? 0) + 1;
         }
         }
@@ -174,7 +175,10 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
     '',
     '',
     'Build from:',
     'Build from:',
     ...HOSTS.map(host => ` - ${host[0]}`),
     ...HOSTS.map(host => ` - ${host[0]}`),
-    ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
+    ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
+    ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
+    ' - https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt',
+    ' - https://phishing.army/download/phishing_army_blocklist.txt'
   ];
   ];
 
 
   return Promise.all([
   return Promise.all([
@@ -182,7 +186,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
       'Sukka\'s Ruleset - Reject Base',
       'Sukka\'s Ruleset - Reject Base',
       description,
       description,
       new Date(),
       new Date(),
-      traceSync('* Sort reject domainset', () => dudupedDominArray.sort(createDomainSorter(gorhill))),
+      traceSync('* Sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)),
       'domainset',
       'domainset',
       path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
       path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
       path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
       path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')

+ 5 - 2
Build/build-speedtest-domainset.ts

@@ -1,13 +1,14 @@
 import { domainDeduper } from './lib/domain-deduper';
 import { domainDeduper } from './lib/domain-deduper';
 import path from 'path';
 import path from 'path';
 import { createRuleset } from './lib/create-file';
 import { createRuleset } from './lib/create-file';
-import domainSorter from './lib/stable-sort-domain';
+import { sortDomains } from './lib/stable-sort-domain';
 
 
 import { Sema } from 'async-sema';
 import { Sema } from 'async-sema';
 import * as tldts from 'tldts';
 import * as tldts from 'tldts';
 import { task } from './lib/trace-runner';
 import { task } from './lib/trace-runner';
 import { fetchWithRetry } from './lib/fetch-retry';
 import { fetchWithRetry } from './lib/fetch-retry';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { SHARED_DESCRIPTION } from './lib/constants';
+import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
 
 
 const s = new Sema(3);
 const s = new Sema(3);
 
 
@@ -140,7 +141,9 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async () => {
     }
     }
   }
   }
 
 
-  const deduped = domainDeduper(Array.from(domains)).sort(domainSorter);
+  const gorhill = await getGorhillPublicSuffixPromise();
+  const deduped = sortDomains(domainDeduper(Array.from(domains)), gorhill);
+
   const description = [
   const description = [
     ...SHARED_DESCRIPTION,
     ...SHARED_DESCRIPTION,
     '',
     '',

+ 10 - 11
Build/lib/get-phishing-domains.ts

@@ -1,7 +1,7 @@
 import fsp from 'fs/promises';
 import fsp from 'fs/promises';
 import path from 'path';
 import path from 'path';
 import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
 import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
-import { processHosts } from './parse-filter';
+import { processDomainLists, processHosts } from './parse-filter';
 import { traceAsync, traceSync } from './trace-runner';
 import { traceAsync, traceSync } from './trace-runner';
 import * as tldts from 'tldts';
 import * as tldts from 'tldts';
 import { createTrie } from './trie';
 import { createTrie } from './trie';
@@ -33,7 +33,12 @@ const BLACK_TLD = new Set([
   'club',
   'club',
   'cn',
   'cn',
   'codes',
   'codes',
+  'co.uk',
+  'co.in',
+  'com.br',
   'com.cn',
   'com.cn',
+  'com.pl',
+  'com.vn',
   'cool',
   'cool',
   'cyou',
   'cyou',
   'fit',
   'fit',
@@ -53,6 +58,7 @@ const BLACK_TLD = new Set([
   'ltd',
   'ltd',
   'ml',
   'ml',
   'mobi',
   'mobi',
+  'net.pl',
   'one',
   'one',
   'online',
   'online',
   'pro',
   'pro',
@@ -79,19 +85,12 @@ const BLACK_TLD = new Set([
 ]);
 ]);
 
 
 export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
 export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
-  const [domainSet, gorhill] = await Promise.all([
+  const [domainSet, domainSet2, gorhill] = await Promise.all([
     processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
     processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
-    // processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
-    // processFilterRules(
-    //   'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
-    //   [
-    //     'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
-    //     // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
-    //     // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
-    //   ]
-    // ),
+    processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
     getGorhillPublicSuffixPromise()
     getGorhillPublicSuffixPromise()
   ]);
   ]);
+  domainSet2.forEach((domain) => domainSet.add(domain));
 
 
   traceSync.skip('* whitelisting phishing domains', () => {
   traceSync.skip('* whitelisting phishing domains', () => {
     const trieForRemovingWhiteListed = createTrie(domainSet);
     const trieForRemovingWhiteListed = createTrie(domainSet);

+ 15 - 15
Build/lib/parse-filter.ts

@@ -37,27 +37,27 @@ const normalizeDomain = (domain: string) => {
   return h[0] === '.' ? h.slice(1) : h;
   return h[0] === '.' ? h.slice(1) : h;
 };
 };
 
 
-export async function processDomainLists(domainListsUrl: string, includeAllSubDomain = false) {
-  const domainSets = new Set<string>();
+export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false) {
+  return traceAsync(`- processDomainLists: ${domainListsUrl}`, async () => {
+    const domainSets = new Set<string>();
 
 
-  for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) {
-    const domainToAdd = processLine(line);
-    if (!domainToAdd) {
-      continue;
-    }
+    for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) {
+      const domainToAdd = processLine(line);
+      if (!domainToAdd) continue;
 
 
-    if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
-      warnOnce(domainListsUrl, false, DEBUG_DOMAIN_TO_FIND);
-      foundDebugDomain = true;
-    }
+      if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
+        warnOnce(domainListsUrl, false, DEBUG_DOMAIN_TO_FIND);
+        foundDebugDomain = true;
+      }
 
 
-    domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
-  }
+      domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
+    }
 
 
-  return domainSets;
+    return domainSets;
+  });
 }
 }
 
 
-export async function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false) {
+export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false) {
   return traceAsync(`- processHosts: ${hostsUrl}`, async () => {
   return traceAsync(`- processHosts: ${hostsUrl}`, async () => {
     const domainSets = new Set<string>();
     const domainSets = new Set<string>();
 
 

+ 5 - 0
Build/lib/reject-data-source.ts

@@ -26,6 +26,11 @@ export const HOSTS = [
   ['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
   ['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
 ] as const;
 ] as const;
 
 
+export const DOMAIN_LISTS = [
+  // DigitalSide Threat-Intel - OSINT Hub
+  ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true]
+] as const;
+
 export const ADGUARD_FILTERS = [
 export const ADGUARD_FILTERS = [
   // EasyList
   // EasyList
   [
   [

+ 0 - 13
Build/lib/stable-sort-domain.test.ts

@@ -1,13 +0,0 @@
-import domainSorter from './stable-sort-domain';
-// eslint-disable-next-line import/no-unresolved -- fuck eslint-import
-import { describe, it, expect } from 'bun:test';
-
-describe('stable-sort-domain', () => {
-  it('.ks.cn, .tag.unclaimedproperty.ks.gov', () => {
-    expect(domainSorter('.ks.cn', '.tag.unclaimedproperty.ks.gov')).toBe(-1);
-  });
-
-  it('.fgnzdb.xyz, .hub.fghtem.com', () => {
-    expect(domainSorter('.fgnzdb.xyz', '.hub.fghtem.com')).toBe(1);
-  });
-});

+ 20 - 35
Build/lib/stable-sort-domain.ts

@@ -10,18 +10,16 @@ const compare = (a: string | null, b: string | null) => {
     return -1;
     return -1;
   }
   }
 
 
-  if (a.length !== b.length) {
-    const r = a.length - b.length;
-    if (r > 0) {
-      return 1;
-    }
-    if (r < 0) {
-      return -1;
-    }
-    return 0;
+  const aLen = a.length;
+  const r = aLen - b.length;
+  if (r > 0) {
+    return 1;
+  }
+  if (r < 0) {
+    return -1;
   }
   }
 
 
-  for (let i = 0; i < a.length; i++) {
+  for (let i = 0; i < aLen; i++) {
     if (b[i] == null) {
     if (b[i] == null) {
       return 1;
       return 1;
     }
     }
@@ -35,34 +33,21 @@ const compare = (a: string | null, b: string | null) => {
   return 0;
   return 0;
 };
 };
 
 
-const createDomainSorter = (gorhill: PublicSuffixList | null = null) => {
-  if (gorhill) {
-    const getDomain = createCachedGorhillGetDomain(gorhill);
+export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
+  const getDomain = createCachedGorhillGetDomain(gorhill);
+  const domains = inputs.reduce<Record<string, string>>((acc, cur) => {
+    acc[cur] ||= getDomain(cur);
+    return acc;
+  }, {});
 
 
-    return (a: string, b: string) => {
-      if (a === b) return 0;
-
-      const aDomain = getDomain(a);
-      const bDomain = getDomain(b);
-
-      const resultDomain = compare(aDomain, bDomain);
-      return resultDomain !== 0 ? resultDomain : compare(a, b);
-    };
-  }
-
-  // eslint-disable-next-line @typescript-eslint/no-var-requires -- fuck
-  const tldts = require('./cached-tld-parse');
-
-  return (a: string, b: string) => {
+  const sorter = (a: string, b: string) => {
     if (a === b) return 0;
     if (a === b) return 0;
 
 
-    const aDomain = tldts.parse(a).domain;
-    const bDomain = tldts.parse(b).domain;
+    const aDomain = domains[a];
+    const bDomain = domains[b];
 
 
-    const resultDomain = compare(aDomain, bDomain);
-    return resultDomain !== 0 ? resultDomain : compare(a, b);
+    return compare(aDomain, bDomain) || compare(a, b);
   };
   };
-};
 
 
-export default createDomainSorter();
-export { createDomainSorter };
+  return inputs.sort(sorter);
+};