浏览代码

Update CDN & Reject Hosts

SukkaW 1 年之前
父节点
当前提交
2d4c07d62f
共有 4 个文件被更改,包括 61 次插入36 次删除
  1. 10 7
      Build/constants/reject-data-source.ts
  2. 34 28
      Build/lib/get-phishing-domains.ts
  3. 7 1
      Source/domainset/cdn.conf
  4. 10 0
      Source/domainset/reject_sukka.conf

+ 10 - 7
Build/constants/reject-data-source.ts

@@ -22,12 +22,7 @@ export const HOSTS_EXTRA: HostsSource[] = [
   // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
   ['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
   // ad-wars is not actively maintained, so we set a 7 days cache ttl
-  ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()],
-  [
-    'https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt',
-    [],
-    true, TTL.TWLVE_HOURS()
-  ]
+  ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()]
 ];
 
 export const DOMAIN_LISTS: HostsSource[] = [
@@ -84,6 +79,10 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [
   ['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()]
 ];
 
+export const PHISHING_HOSTS_EXTRA: HostsSource[] = [
+  ['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true, TTL.THREE_DAYS()]
+];
+
 export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [
   [
     'https://phishing-filter.pages.dev/phishing-filter-domains.txt',
@@ -396,5 +395,9 @@ export const PREDEFINED_WHITELIST = [
   'ab.chatgpt.com', // EasyPrivacy blocks this
   'jnn-pa.googleapis.com', // ad-wars
   'imasdk.googleapis.com', // ad-wars
-  '.l.qq.com' // ad-wars
+  '.l.qq.com', // ad-wars
+  '.clients.your-server.de', // rDNS .static.183.213.201.138.clients.your-server.de
+  '.bc.googleusercontent.com', // rDNS 218.178.172.34.bc.googleusercontent.com
+  '.host.secureserver.net', // rDNS .64.149.167.72.host.secureserver.net,
+  '.ip.linodeusercontent.com' // rDNS 45-79-169-153.ip.linodeusercontent.com
 ];

+ 34 - 28
Build/lib/get-phishing-domains.ts

@@ -1,10 +1,10 @@
-import { processDomainLists } from './parse-filter';
+import { processDomainLists, processHosts } from './parse-filter';
 import * as tldts from 'tldts-experimental';
 
 import { dummySpan } from '../trace';
 import type { Span } from '../trace';
 import { appendArrayInPlaceCurried } from './append-array-in-place';
-import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
+import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
 import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
 import picocolors from 'picocolors';
 import createKeywordFilter from './aho-corasick';
@@ -22,7 +22,7 @@ const BLACK_TLD = new Set([
   'ga', 'gd', 'gives', 'gq', 'group', 'host',
   'icu', 'id', 'info', 'ink',
   'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd',
-  'me', 'men', 'ml', 'mobi', 'mom',
+  'me', 'men', 'ml', 'mobi', 'mom', 'monster',
   'net.pl',
   'one', 'online',
   'party', 'pro', 'pl', 'pw',
@@ -48,6 +48,12 @@ const WHITELIST_MAIN_DOMAINS = new Set([
   'zendesk.com'
 ]);
 
+const leathalKeywords = createKeywordFilter([
+  'vinted-',
+  'inpost-pl',
+  'vlnted-'
+]);
+
 const sensitiveKeywords = createKeywordFilter([
   '.amazon-',
   '-amazon',
@@ -65,14 +71,15 @@ const sensitiveKeywords = createKeywordFilter([
   'booking-com',
   'booking.com-',
   'booking-eu',
-  'vinted-cz',
+  'vinted-',
   'inpost-pl',
   'login.microsoft',
   'login-microsoft',
   'microsoftonline',
   'google.com-',
   'minecraft',
-  'staemco'
+  'staemco',
+  'oferta'
 ]);
 const lowKeywords = createKeywordFilter([
   'transactions-',
@@ -96,7 +103,8 @@ const lowKeywords = createKeywordFilter([
   'microsof',
   'passwordreset',
   '.google-',
-  'recover'
+  'recover',
+  'banking'
 ]);
 
 const cacheKey = createCacheKey(__filename);
@@ -154,20 +162,15 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
     if (
       // !WHITELIST_MAIN_DOMAINS.has(apexDomain)
       (domainScoreMap[apexDomain] >= 24)
-      || (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 4)
-      || (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 7)
-      || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 10)
-      || (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 16)
+      || (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 7)
+      || (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 11)
+      || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 14)
+      || (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 20)
     ) {
       domainArr.push('.' + apexDomain);
     }
   }
 
-  // console.log(
-  //   domainScoreMap['wordpress.com'],
-  //   domainCountMap['wordpress.com']
-  // );
-
   return Promise.resolve(domainArr);
 }, {
   serializer: serializeArray,
@@ -179,8 +182,10 @@ export function getPhishingDomains(parentSpan: Span) {
     const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
       const domainArr: string[] = [];
 
-      (await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
-        .forEach(appendArrayInPlaceCurried(domainArr));
+      await Promise.all([
+        ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)),
+        ...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))
+      ]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr)));
 
       return domainArr;
     });
@@ -193,6 +198,10 @@ export function getPhishingDomains(parentSpan: Span) {
 }
 
 export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
+  if (leathalKeywords(fullDomain)) {
+    return 100;
+  }
+
   let weight = 0;
 
   const hitLowKeywords = lowKeywords(fullDomain);
@@ -209,17 +218,14 @@ export function calcDomainAbuseScore(subdomain: string, fullDomain: string = sub
 
   const subdomainLength = subdomain.length;
 
-  if (subdomainLength > 6) {
-    weight += 0.25;
-    if (subdomainLength > 11) {
-      weight += 0.6;
-      if (subdomainLength > 20) {
-        weight += 1;
-        if (subdomainLength > 30) {
-          weight += 2;
-          if (subdomainLength > 40) {
-            weight += 4;
-          }
+  if (subdomainLength > 13) {
+    weight += 0.2;
+    if (subdomainLength > 20) {
+      weight += 1;
+      if (subdomainLength > 30) {
+        weight += 5;
+        if (subdomainLength > 40) {
+          weight += 10;
         }
       }
     }

+ 7 - 1
Source/domainset/cdn.conf

@@ -1045,9 +1045,12 @@ cdn.consentmanager.net
 widget.usersnap.com
 cdn.playwire.com
 widget.usepylon.com
-
+app.groove.cm
+app.groovefunnels.com
 loader.mantis-intelligence.com
 mantisloader.mantis-awx.com
+conversiongorilla.com
+.patientpop.com
 
 cdn.transcend.io
 .transcend-cdn.com
@@ -3094,3 +3097,6 @@ assets.raspberrypi.com
 assets.brevo.com
 corp-backend.brevo.com
 designsystem.brevo.com
+assets.grooveapps.com
+assets.clickfunnels.com
+.wixstudio.io

+ 10 - 0
Source/domainset/reject_sukka.conf

@@ -319,6 +319,7 @@ inst.360safe.com
 .dkonto.pl
 .googleplay.pro
 .printondemandmerchandise.com
+.thebitmeister.com
 # Phishing domain like `www-facebook.to-kr.com`
 .to-kr.com
 # Poland Hosting dhosting.pl's free subdomain
@@ -718,6 +719,10 @@ ads-shopping.shouqianba.com
 ad.maoyan.com
 e.jparking.cn
 adapi.izuiyou.com
+.sponsor.printondemandagency.com
+.whatisaweekend.com
+.mob.com
+.duomeng.cn
 
 adimg.daumcdn.net
 live.tvpot.daum.net
@@ -892,6 +897,10 @@ metrics.brevo.com
 .adfunlink.com
 .ubixioe.com
 
+# CNAME: dualstack.beaconserver-ce-vpc0-1537565064.eu-west-1.elb.amazonaws.com
+# note "beaconserver"
+.internal.dradis.netflix.com
+
 .adjust.io
 .airbrake.io
 .apsalar.com
@@ -1968,6 +1977,7 @@ xadx.file.market.xiaomi.com
 .pandora.xiaomi.com
 mi-stat.gslb.mi-idc.com
 mlog.search.xiaomi.net
+# verify.sec.xiaomi.com # 验证码 captcha
 # 小米云扫描组件
 .avlyun.sec.intl.miui.com
 .avlyun.sec.miui.com