瀏覽代碼

Update CDN & Phishing & AI Hosts

SukkaW 2 年之前
父節點
當前提交
7e13ae7a24
共有 4 個文件被更改,包括 82 次插入39 次删除
  1. 54 35
      Build/build-phishing-domainset.ts
  2. 6 2
      Build/lib/parse-filter.ts
  3. 10 2
      Source/domainset/cdn.conf
  4. 12 0
      Source/non_ip/ai.conf

+ 54 - 35
Build/build-phishing-domainset.ts

@@ -1,4 +1,4 @@
-import { processHosts } from './lib/parse-filter';
+import { processDomainLists, processHosts } from './lib/parse-filter';
 import path from 'path';
 import { createRuleset } from './lib/create-file';
 import { processLine } from './lib/process-line';
@@ -21,52 +21,69 @@ const WHITELIST_DOMAIN = new Set([
   'notion.site'
 ]);
 const BLACK_TLD = new Set([
-  'xyz',
-  'top',
-  'win',
-  'vip',
-  'site',
-  'space',
-  'online',
-  'icu',
-  'fun',
-  'shop',
-  'cool',
-  'cyou',
-  'id',
-  'pro',
-  'za.com',
-  'sa.com',
-  'ltd',
-  'group',
-  'rest',
-  'tech',
-  'link',
-  'ink',
+  'autos',
   'bar',
-  'tokyo',
-  'tk',
-  'cf',
-  'gq',
-  'ga',
-  'ml',
+  'biz',
+  'bond',
+  'business',
+  'buzz',
   'cc',
-  'cn',
-  'codes',
+  'cf',
+  'cfd',
+  'click',
   'cloud',
   'club',
-  'click',
-  'cfd',
+  'cn',
+  'codes',
+  'com.cn',
+  'cool',
+  'cyou',
   'fit',
+  'fun',
+  'ga',
+  'gd',
+  'gq',
+  'group',
+  'host',
+  'icu',
+  'id',
+  'info',
+  'ink',
+  'life',
+  'live',
+  'link',
+  'ltd',
+  'ml',
   'mobi',
-  'buzz',
   'one',
-  'com.cn'
+  'online',
+  'pro',
+  'pl',
+  'pw',
+  'rest',
+  'rf.gd',
+  'sa.com',
+  'sbs',
+  'shop',
+  'site',
+  'space',
+  'store',
+  'tech',
+  'tk',
+  'tokyo',
+  'top',
+  'vip',
+  'vn',
+  'website',
+  'win',
+  'xyz',
+  'za.com'
 ]);
 
 export const buildPhishingDomainSet = task(import.meta.path, async () => {
   const [domainSet, gorhill] = await Promise.all([
     processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
+    // processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
     // processFilterRules(
     //   'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
     //   [
@@ -78,6 +95,8 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => {
     getGorhillPublicSuffixPromise()
   ]);
 
+  // _domainSet2.forEach(i => domainSet.add(i));
+
   traceSync('* whitelist', () => {
     const trieForRemovingWhiteListed = createTrie(domainSet);
     WHITELIST_DOMAIN.forEach(white => {

+ 6 - 2
Build/lib/parse-filter.ts

@@ -37,7 +37,7 @@ const normalizeDomain = (domain: string) => {
   return null;
 };
 
-export async function processDomainLists(domainListsUrl: string | URL) {
+export async function processDomainLists(domainListsUrl: string | URL, includeAllSubDomain = false) {
   if (typeof domainListsUrl === 'string') {
     domainListsUrl = new URL(domainListsUrl);
   }
@@ -55,7 +55,11 @@ export async function processDomainLists(domainListsUrl: string | URL) {
       foundDebugDomain = true;
     }
 
-    domainSets.add(domainToAdd);
+    if (includeAllSubDomain) {
+      domainSets.add(`.${domainToAdd}`);
+    } else {
+      domainSets.add(domainToAdd);
+    }
   }
 
   return domainSets;

+ 10 - 2
Source/domainset/cdn.conf

@@ -169,8 +169,7 @@ packages-cf.termux.dev
 # Homebrew
 formulae.brew.sh
 # crates.io
-crates.io
-static.crates.io
+.crates.io
 # PHP
 .getcomposer.org
 .packagist.org
@@ -182,6 +181,7 @@ cdn.sheetjs.com
 repo.nextdns.io
 oss-binaries.phusionpassenger.com
 release.runcloud.io
+curl.se
 
 # >> WordPress CDN
 s0.wp.com
@@ -995,6 +995,10 @@ static.maxmind.com
 download.maxmind.com
 # roblox
 .rbxcdn.com
+# EA.com (Origin)
+cdn.mcr.ea.com
+pl.ea.com
+media.contentapi.ea.com
 # LottieFiles
 assets0.lottiefiles.com
 assets1.lottiefiles.com
@@ -1042,6 +1046,8 @@ static.tumblr.com
 .media.tumblr.com
 # SoundCloud
 .sndcdn.com
+# SoundCloud Widget
+w.soundcloud.com
 # Vercel
 assets.vercel.com
 image.ship.vercel.com
@@ -1211,6 +1217,8 @@ static.grammarly.com
 # OpenAI
 .oaistatic.com
 .oaiusercontent.com
+# Claude AI
+s-cdn.anthropic.com
 # Manhuagui
 i.hamreus.com
 us.hamreus.com

+ 12 - 0
Source/non_ip/ai.conf

@@ -0,0 +1,12 @@
+# $ meta_title Sukka's Ruleset - AIGC Domains
+# $ meta_description This file contains domains of OpenAI, Claude.
+
+DOMAIN-SUFFIX,openai.com
+DOMAIN-SUFFIX,oaistatic.com
+DOMAIN-SUFFIX,oaiusercontent.com
+DOMAIN-SUFFIX,ai.com
+DOMAIN-SUFFIX,x.ai
+DOMAIN-SUFFIX,openaiapi-site.azureedge.net
+DOMAIN-SUFFIX,perplexity.ai
+DOMAIN-SUFFIX,anthropic.com
+DOMAIN-SUFFIX,claude.ai