浏览代码

Remove Outdated/Dead Domains

SukkaW 11 月之前
父节点
当前提交
fe3c5d95c1
共有 5 个文件被更改,包括 85 次插入94 次删除
  1. 32 20
      Build/tools-dedupe-src.ts
  2. 1 9
      Source/domainset/cdn.conf
  3. 0 55
      Source/domainset/reject.conf
  4. 0 10
      Source/domainset/reject_extra.conf
  5. 52 0
      Source/non_ip/reject.conf

+ 32 - 20
Build/tools-dedupe-src.ts

@@ -3,9 +3,18 @@ import path from 'node:path';
 import fsp from 'node:fs/promises';
 import { SOURCE_DIR } from './constants/dir';
 import { readFileByLine } from './lib/fetch-text-by-line';
+import { processLine } from './lib/process-line';
 
-const WHITELIST: string[] = ['packages.argotunnel.com', 'compass-ssl.xbox.com', 'static.agilebits.com', 'ntp.api.bz', 'softwareupdate.vmware.com', 'ftp.apache.org', 'ftp.cuhk.edu.hk', 'apache.belnet.be', 'mirrors.viethosting.com', 'apache.01link.hk', 'artfiles.org.org', 'mirror.synyx.de', 'apache.mediamirrors.org', 'wwwftp.ciril.fr', 'mirror.dkd.de', 'apache.javapipe.com', 'ftp.heikorichter.name', 'apache.panu.it', 'mirrors.supportex.net', 'apache.forsale.plus', 'apache.spinellicreations.com', 'ftp.itu.edu.tr', 'mirror1.spango.com', 'apache.oshte.net', 'mirrors.koehn.com', 'apache.dattatec.com', 'download.nextag.com', 'mirror.jre655.com', 'mirror.kiu.ac.ug', 'apache.cp.if.ua', 'mirrors.sorengard.com', 'ftp.igh.cnrs.fr', 'mirrors.hostingromania.ro', 'mirror.bhoovd.com', 'download.xs4all.nl', 'cpan.panu.it', 'cpan.nctu.edu.tw', 'mirror.serverbeheren.nl', 'cpan.llarian.net', 'cpan.etla.org', 'mirrors.syringanetworks.net', 'mirror.met.hu', 'cpan.cs.uu.nl', 'mirror.teklinks.com', 'mirror.rasanegar.com', 'ctan.kako-dev.de', 'ctan.ijs.si', 'mirrors.chevalier.io', 'mirror.yongbok.net', '1-mirrors.in.sahilister.net', '2-mirrors.in.sahilister.net', 'cc.uoc.gr', 'mirror.sergal.org', 'mirrors.mi.ras.ru', 'ctan.cs.uu.nl', 'mirrors.tripadvisor.com', 'gnu.spinellicreations.com', 'ftp.neowiz.com', 'mirror.rackdc.com', 'mirror.veriportal.com', 'ftp.pbone.net', 'downloader.cursor.sh', 'redrockdigimark.commirror', 'nimiq.by', 'aaxdetect.com', 'ctan.epst-tlemcen.dz', 'udahce.com', 'rs-staticart.ybcdn.net', 'doumpaq.com', 'c.medialytics.com', 'keybut.com', 'adserver.ubiyoo.com', 'kaspa-classic.com', 'minafacil.com', 'jiandanpool.com', 'xn--blockchan-n5a.com', 'alphax.pro', 'crypto-pool.online', 'bbqpool.org', 'nyxcoin.org', 'lpool.name', 'tsfpool.xyz', 'ltcmaster.xyz', '8282.space', 'myminingpool.uk', 'binance.live', 'mining.garden', 'scaleway.ovh', 'atpool.party', 'nimiq.by', 'binance.directory', 'onyx.run', 'lucky-pool.co.uk', 'ra7.xyz'];
+const ENFORCED_WHITELIST = [
+  'hola.sk',
+  'hola.org',
+  'iadmatapk.nosdn.127.net',
+  'httpdns.bilivideo.com',
+  'httpdns-v6.gslb.yy.com',
+  'twemoji.maxcdn.com'
+];
 
+const WHITELIST: string[] = ['ton.local.twitter.com', 'prod.msocdn.com', 'twemoji.maxcdn.com', 'img.urlnode.com', 'ipfsgate.com', 'googleplay.pro', 'iadmatapk.nosdn.127.net', 'hola-shopping.com', 'brdtest.co', 'mynextphone.io', 'hola.hk', 'holashop.org', 'hola.sk', 'hola.com.sg', 'c.medialytics.com', 'adstats.mgc-games.com', 'search.mgc-games.com', 'kissdoujin.com', 'newminersage.com', 'trossmining.de', 'hashncash.net', 'microsolt.ru', 'moneropool.ru', 'hashforcash.us', 'bitcoinn.biz', 'webmining.co', 'lamba.top', 'httpdns.bilivideo.com', 'httpdns-v6.gslb.yy.com', 'k-cdn.depot.dev', 'li-cdn.com'];
 (async () => {
   const files = await new Fdir()
     .withFullPaths()
@@ -19,37 +28,32 @@ const WHITELIST: string[] = ['packages.argotunnel.com', 'compass-ssl.xbox.com',
     .crawl(SOURCE_DIR)
     .withPromise();
 
-  await Promise.all(files.map(dedupeFile));
+  const whitelist = WHITELIST.filter((item) => ENFORCED_WHITELIST.every((whitelistItem) => !isDomainSuffix(whitelistItem, item)));
+
+  await Promise.all(files.map(file => dedupeFile(file, whitelist)));
 })();
 
-async function dedupeFile(file: string) {
+async function dedupeFile(file: string, whitelist: string[]) {
   const set = new Set<string>();
   const result: string[] = [];
 
-  for await (const line of readFileByLine(file)) {
-    if (line.length === 0) {
-      result.push(line);
-      continue;
-    }
-    if (line[0] === '#') {
-      result.push(line);
+  for await (const l of readFileByLine(file)) {
+    const line = processLine(l);
+    if (!line) {
+      if (l.startsWith('# $ skip_dedupe_src')) {
+        return;
+      }
+
+      result.push(l);
       continue;
     }
+
     if (set.has(line)) {
       continue;
     }
 
     // We can't use a trie here since we need to keep the order
-    if (WHITELIST.some((item) => {
-      if (item.length > line.length) {
-        return false;
-      }
-
-      return (
-        item === line // exact match
-        || line.endsWith('.' + item) // the whitelist is considered as a domain-suffix
-      );
-    })) {
+    if (whitelist.some((item) => isDomainSuffix(item, line))) {
       continue;
     }
 
@@ -59,3 +63,11 @@ async function dedupeFile(file: string) {
 
   return fsp.writeFile(file, result.join('\n') + '\n');
 }
+
+function isDomainSuffix(suffixRule: string, domain: string) {
+  if (suffixRule.length > domain.length + 1) {
+    return false;
+  }
+
+  return suffixRule === domain || domain.endsWith('.' + suffixRule);
+}

+ 1 - 9
Source/domainset/cdn.conf

@@ -361,7 +361,6 @@ cdnstatic.tencentcs.com
 cdn.cms-twdigitalassets.com
 fonts.twitter.com
 # Twitter Corp Network leaked to sourcemap
-ton.local.twitter.com
 # Header: Server: AmazonS3
 platform.twitter.com
 # fix twitter
@@ -550,7 +549,6 @@ cdn.prod.ext.web.purview.azure.com
 res-geo.cdn.office.net
 # statics.teams.cdn.office.net # china mainland cdn
 static2.sharepointonline.com
-prod.msocdn.com
 spoprod-a.akamaihd.net
 img-prod-cms-rt-microsoft-com.akamaized.net
 prod-streaming-video-msn-com.akamaized.net
@@ -897,7 +895,6 @@ s.imgflip.com
 .datocms-assets.com
 images.pexels.com
 image.nmb.best
-img.urlnode.com
 pomf2.lain.la
 img.xhacgn.com
 img.m.mw
@@ -1198,6 +1195,7 @@ cdn.seondf.com
 app.brightback.com
 .udify.app
 flo.uri.sh
+.bigcontent.io
 cdn.c1.amplience.net
 .media.amplience.net
 .static.amplience.net
@@ -1499,7 +1497,6 @@ ipfs.d.tube
 .ipfs.xoqq.ch
 .ipfs.anonymize.com
 .ipfs.scalaproject.io
-.search.ipfsgate.com
 .ipfs.decoo.io
 .ipfs.uploads.nu
 .ipfs1.pixura.io
@@ -3186,7 +3183,6 @@ static.bytepan.com
 js.recurly.com
 ni-assets.azureedge.net
 js.short.io
-js.braintreegateway.com
 assets.braintreegateway.com
 images.web3.storage
 api.web3.storage
@@ -3869,8 +3865,6 @@ cdn.hinative.com
 img.atwiki.jp
 .imgikzy.com
 .imgikuncdn.com
-.bigcontent.io
-.media.amplience.net
 images.tokopedia.net
 assets.tokopedia.net
 asset.chase.com
@@ -4027,7 +4021,6 @@ cdn.thenewstack.io
 cdn.wikiwiki.jp
 static.pingcap.com
 p.depot.dev
-k-cdn.depot.dev
 s.slideme.org
 media.telanganatoday.com
 image.telanganatoday.com
@@ -4278,7 +4271,6 @@ dz2cdn1.dzone.com
 newfold.scene7.com
 mta.newfold.com
 stbff.newfold.com
-.li-cdn.com
 static.buydomains.com
 static.registration.bluehost.com
 cdn.one.store

+ 0 - 55
Source/domainset/reject.conf

@@ -304,7 +304,6 @@ inst.360safe.com
 .ipv4dns.com
 .ipxx.pro
 .dkonto.pl
-.googleplay.pro
 .printondemandmerchandise.com
 .thebitmeister.com
 .tggame.xyz
@@ -533,7 +532,6 @@ xy-log.tagtic.cn
 .adroll.com
 .media.net
 .app-ads-services.com
-.ad.10010.com
 ad.caiyunapp.com
 ad.huajiao.com
 ad.hzyoka.com
@@ -2594,56 +2592,6 @@ switch.cup.com.cn
 .pub.tom.com
 .discovery.tom.com
 
-# brightdata (luminati) SDK
-.l-err.biz
-.luminati-china.biz
-.luminati-china.co
-.amazonaws.com
-.binaryoptionz.com
-.holacdn.com
-.hola-shopping.com
-.lum-bext.com
-.luminatinet.com
-.lum-lpm.com
-.lumtest.com
-.proxy-review.com
-.svd-cdn.com
-.thecodeil.com
-.tukif.com
-.whatismyippro.com
-.zon-networks.com
-.whoisit.co
-.hola.hk
-.lum-api.io
-.lum-cn.io
-.lum-ext.io
-.luminati.io
-.luminati-china.io
-.lum-int.io
-.lum-sdk.io
-.mynextphone.io
-.topvpn.io
-.l-agent.me
-.luminatichina.net
-.hola.org
-.holashop.org
-.h-vpn.org
-.hola.com.sg
-.hola.sk
-.proxyway.com
-.optmd.com
-.adincube.com
-.brdtest.co
-.l-cdn.com
-.earnapp.com
-.brightvpn.com
-.brightinitiative.com
-.brightdata.com
-.bright-sdk.com
-.brdtest.com
-# .hola.ph # expired
-.hola.ph
-
 # EasyPrivacy Migration
 .click.signaturemarket.co
 .link.clubmanagergame.com
@@ -2813,7 +2761,6 @@ bu2.duba.com
 bu1.duba.com
 c.bing.com
 .cdn.creative.medialytics.com
-c.medialytics.com
 .adadapted.com
 
 .nearbyad.com
@@ -2878,8 +2825,6 @@ adstat.izuiyou.com
 wkrd.tingyun.com
 ma-adx.ctrip.com
 vapi.tiandi.com
-adstats.mgc-games.com
-search.mgc-games.com
 t.adbxb.com
 e-ad-monitor.huya.com
 ana.masky.biddingx.com

+ 0 - 10
Source/domainset/reject_extra.conf

@@ -1078,7 +1078,6 @@ nimiq.terorie.com
 .jixiangrong.com
 .jquery-js.com
 .jscoinminer.com
-.kissdoujin.com
 .kmdmonster.com
 .kredsexplorer.com
 .laserveradedomaina.com
@@ -1246,7 +1245,6 @@ nimiq.terorie.com
 .skralg.com
 .12finance.com
 .terorie.com
-.newminersage.com
 .litecointools.com
 .stitthappens.com
 .tgservers.com
@@ -1354,7 +1352,6 @@ nimiq.terorie.com
 .object.de
 .xmrpool.de
 .rebootcamp.de
-.trossmining.de
 .trustaproiam.de
 .trusteverything.de
 .testserverino.de
@@ -1492,7 +1489,6 @@ nimiq.terorie.com
 .voidr.net
 .neuropool.net
 .richpool.net
-.hashncash.net
 .phpcoin.net
 .blockdiggers.net
 .fastblocks.net
@@ -1855,8 +1851,6 @@ nimiq.terorie.com
 .rupoolproject.ru
 .xmr5b.ru
 .mainpool.ru
-.microsolt.ru
-.moneropool.ru
 .progaming-cheats.ru
 .ru-poolbe.ru
 .statpipe.ru
@@ -1893,7 +1887,6 @@ nimiq.terorie.com
 .whitewalr.us
 .squishycoin.us
 .freakhouse.us
-.hashforcash.us
 .vaporumpool.us
 .poolbe.us
 .scryptpool.us
@@ -2177,7 +2170,6 @@ nimiq.terorie.com
 .advisorinvest.biz
 .ashour.biz
 .atlantistrade.biz
-.bitcoinn.biz
 .bitday.biz
 .mpool.biz
 .egopastor.biz
@@ -2304,7 +2296,6 @@ nimiq.terorie.com
 .lightminer.co
 .mmsubtitles.co
 .monitoringservice.co
-.webmining.co
 .ufocoin.co
 .abcpool.co
 .btc.top
@@ -2322,7 +2313,6 @@ nimiq.terorie.com
 .coolpool.top
 .chia-apool.top
 .speedpool.top
-.lamba.top
 .nitsche.top
 .qukuai.top
 .uralscoin.info

+ 52 - 0
Source/non_ip/reject.conf

@@ -1,5 +1,6 @@
 # $ meta_title Sukka's Ruleset - Reject Domains
 # $ meta_description The ruleset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining
+# $ skip_dedupe_src enforce some blocking to reduce file size
 
 DOMAIN,this_rule_set_is_made_by_sukkaw.skk.moe
 
@@ -157,6 +158,57 @@ DOMAIN-SUFFIX,esgltc.com
 DOMAIN-SUFFIX,kimo.tw
 DOMAIN-SUFFIX,lambda-ix.net
 
+# >> brightdata (luminati) SDK
+# Though most of domains are expired, still including them to reduce the file size
+DOMAIN-SUFFIX,l-err.biz
+DOMAIN-SUFFIX,luminati-china.biz
+DOMAIN-SUFFIX,luminati-china.co
+DOMAIN-SUFFIX,amazonaws.com
+DOMAIN-SUFFIX,binaryoptionz.com
+DOMAIN-SUFFIX,holacdn.com
+DOMAIN-SUFFIX,hola-shopping.com
+DOMAIN-SUFFIX,lum-bext.com
+DOMAIN-SUFFIX,luminatinet.com
+DOMAIN-SUFFIX,lum-lpm.com
+DOMAIN-SUFFIX,lumtest.com
+DOMAIN-SUFFIX,proxy-review.com
+DOMAIN-SUFFIX,svd-cdn.com
+DOMAIN-SUFFIX,thecodeil.com
+DOMAIN-SUFFIX,tukif.com
+DOMAIN-SUFFIX,whatismyippro.com
+DOMAIN-SUFFIX,zon-networks.com
+DOMAIN-SUFFIX,whoisit.co
+DOMAIN-SUFFIX,hola.hk
+DOMAIN-SUFFIX,lum-api.io
+DOMAIN-SUFFIX,lum-cn.io
+DOMAIN-SUFFIX,lum-ext.io
+DOMAIN-SUFFIX,luminati.io
+DOMAIN-SUFFIX,luminati-china.io
+DOMAIN-SUFFIX,lum-int.io
+DOMAIN-SUFFIX,lum-sdk.io
+DOMAIN-SUFFIX,mynextphone.io
+DOMAIN-SUFFIX,topvpn.io
+DOMAIN-SUFFIX,l-agent.me
+DOMAIN-SUFFIX,luminatichina.net
+DOMAIN-SUFFIX,hola.org
+DOMAIN-SUFFIX,holashop.org
+DOMAIN-SUFFIX,h-vpn.org
+DOMAIN-SUFFIX,hola.com.sg
+DOMAIN-SUFFIX,hola.sk
+DOMAIN-SUFFIX,proxyway.com
+DOMAIN-SUFFIX,optmd.com
+DOMAIN-SUFFIX,adincube.com
+DOMAIN-SUFFIX,brdtest.co
+DOMAIN-SUFFIX,l-cdn.com
+DOMAIN-SUFFIX,earnapp.com
+DOMAIN-SUFFIX,brightvpn.com
+DOMAIN-SUFFIX,brightinitiative.com
+DOMAIN-SUFFIX,brightdata.com
+DOMAIN-SUFFIX,bright-sdk.com
+DOMAIN-SUFFIX,brdtest.com
+# .hola.ph # expired
+DOMAIN-SUFFIX,hola.ph
+
 # >> Google
 # DOMAIN-KEYWORD,adsense # unblocks adsense.google.com
 DOMAIN-KEYWORD,adwords