瀏覽代碼

Remove dead/outdated/duplicated entries

SukkaW 7 月之前
父節點
當前提交
ff60e86a77

+ 35 - 31
Build/tools-dedupe-src.ts

@@ -4,7 +4,8 @@ import fsp from 'node:fs/promises';
 import { SOURCE_DIR } from './constants/dir';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { processLine } from './lib/process-line';
-import { HostnameSmolTrie } from './lib/trie';
+import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
+import { task } from './trace';
 
 const ENFORCED_WHITELIST = [
   'hola.sk',
@@ -20,10 +21,10 @@ const ENFORCED_WHITELIST = [
   'samsungqbe.com'
 ];
 
-const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.com', '.holashop.org', '.jdie.pl', '.sponsor.printondemandagency.com', '.bmcm.pw', '.vplay.life', '.hola.hk', '.peopleland.net', '.120bit.com', '.tekyboycrypto.xyz', '.rocketpool.pro', '.cryptoloot.pro', '.weminerpool.site', '.timg135.top', '.binance.associates', '.lafermedumineur.fr', '.goldencoin.online', '.hola.sk', '.hola.com.sg', '.acashtech.com', '.bitoreum.org', '.mixpools.org', '.decapool.net', '.taichicoin.org', '.luxxeeu.com'];
+const WHITELIST: string[] = ['.dxdhd.com', '.tokto-motion.net', '.hola-shopping.com', '.luxxeeu.com', '.newzgames.com', '.hola.com.sg', 'pengtu.cc', '.cdn-js-query.com', 'samsungcloudsolution.net', 'samsungcloudsolution.com', 'static.estebull.com', '.drawservant.com', '.enjoy7plains.xyz', '.zmfindyourhalf.top', '.mineblocks.eu', '.cointaft.com', '.chain-pool.com', '.lamby-crypto.com', '.grftpool.com', '.onebtcplace.com', '.pepecore.com', '.punchsub.net', '.imzlabs.net', '.datapaw.net', '.smpool.net', '.yetimining.net', '.igrid.org', '.50centfreedom.us', '.cyg2016.xyz', '.easypool.xyz', '.arhash.xyz', '.enviromint.xyz', '.pool.space', '.anomp.cc', '.bitconnectpool.co', '.cryptopool.space', '.automatix.to', '.coolmine.to', '.coolpool.to', '.dpool.to', '.template-download.to', '.aurum7.to', '.sunpool.to', '.speedpool.to', '.cfcnet.to', '.pool.do', '.pool.bit34.com', '.eos.zhizhu.to', '.mubicdn.com', 'cdn.fastmediaing.com', '.webinfcdn.com', '.aosikaimage.com'];
 
-(async () => {
-  const files = await new Fdir()
+task(require.main === module, __filename)(async (span) => {
+  const files = await span.traceChildAsync('crawl thru all files', () => new Fdir()
     .withFullPaths()
     .filter((filepath, isDirectory) => {
       if (isDirectory) return true;
@@ -33,55 +34,58 @@ const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.
       return extname !== '.js' && extname !== '.ts';
     })
     .crawl(SOURCE_DIR)
-    .withPromise();
+    .withPromise());
 
-  const whiteTrie = new HostnameSmolTrie(WHITELIST);
-  ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item));
-  const whitelist = whiteTrie.dump();
+  const whiteTrie = span.traceChildSync('build whitelist trie', () => {
+    const trie = new HostnameSmolTrie(WHITELIST);
+    ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item));
+    return trie;
+  });
 
-  await Promise.all(files.map(file => dedupeFile(file, whitelist)));
-})();
+  await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie))));
+});
 
-async function dedupeFile(file: string, whitelist: string[]) {
-  const set = new Set<string>();
+async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
   const result: string[] = [];
 
+  const trie = new HostnameTrie();
+
   for await (const l of readFileByLine(file)) {
     const line = processLine(l);
+
     if (!line) {
       if (l.startsWith('# $ skip_dedupe_src')) {
         return;
       }
 
-      result.push(l);
+      result.push(l); // keep all comments and blank lines
       continue;
     }
 
-    if (set.has(line)) {
-      continue;
+    if (trie.has(line)) {
+      continue; // drop duplicate
     }
 
-    // We can't use a trie here since we need to keep the order
-    if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) {
-      continue;
+    if (whitelist.has(line)) {
+      continue; // drop whitelisted items
     }
 
-    set.add(line);
+    trie.add(line);
     result.push(line);
   }
 
   return fsp.writeFile(file, result.join('\n') + '\n');
 }
 
-function isDomainSuffix(whiteItem: string, incomingItem: string) {
-  const whiteIncludeDomain = whiteItem[0] === '.';
-  whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
-
-  if (whiteItem === incomingItem) {
-    return true; // as long as exact match, we don't care if subdomain is included or not
-  }
-  if (whiteIncludeDomain) {
-    return incomingItem.endsWith('.' + whiteItem);
-  }
-  return false;
-}
+// function isDomainSuffix(whiteItem: string, incomingItem: string) {
+//   const whiteIncludeDomain = whiteItem[0] === '.';
+//   whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
+
+//   if (whiteItem === incomingItem) {
+//     return true; // as long as exact match, we don't care if subdomain is included or not
+//   }
+//   if (whiteIncludeDomain) {
+//     return incomingItem.endsWith('.' + whiteItem);
+//   }
+//   return false;
+// }

+ 0 - 2
Source/domainset/cdn.conf

@@ -4290,7 +4290,6 @@ cdn1.techbang.com
 cdn2.techbang.com
 cdn0-i.techbang.com
 s3.sitepoint.com
-assets.getpocket.com
 hips.hearstapps.com
 media.newyorker.com
 .pocket-image-cache.com
@@ -4470,7 +4469,6 @@ cdn-img3.iporntv.net
 .mxmcdn.net
 .myspacecdn.com
 media.assettype.com
-gadgets360cdn.com
 amp.akamaized.net
 d.newsweek.com
 g.newsweek.com

+ 0 - 2
Source/domainset/download.conf

@@ -758,7 +758,6 @@ mirror.netcologne.de
 cpan.noris.de
 ftp.hosteurope.de
 mirror.bibleonline.ru
-mirrors.dotsrc.org
 ftp.rediris.es
 osl.ugr.es
 cpan.ip-connect.vn.ua
@@ -906,7 +905,6 @@ mirrors.iu13.net
 mirror.leitecastro.com
 mirrors.xmissions.com
 kr.freebsd.org
-nic.funet.fi
 mirror.ossplanet.net
 repository.su
 lysator.liu.se

+ 0 - 10
Source/domainset/reject.conf

@@ -112,7 +112,6 @@ bad.third-party.site
 .oastify.com
 # Looks like public cdn, in fact location.href hijacker
 .js-query.com
-.cdn-js-query.com
 # Network Util Tools Scam
 .clashforwindows.net
 .clashforwindows.org
@@ -1433,8 +1432,6 @@ ceuswatcab01.blob.core.windows.net
 ac3.msn.com
 ads.eu.msn.com
 ads.msn.com
-ads1.msn.com
-ads2.msn.com
 adsyndication.msn.com
 analytics.msn.com
 c.msn.com
@@ -1920,7 +1917,6 @@ show.look.360.cn
 mclean.lato.cloud.360safe.com
 mvconf.lato.cloud.360safe.com
 mclean.cloud.360safe.com
-mvconf.cloud.360safe.com
 mclean.uk.cloud.360safe.com
 mvconf.uk.cloud.360safe.com
 aicleaner.shouji.360.cn
@@ -2084,7 +2080,6 @@ union.gdtimg.com
 # v2.gdt.qq.com
 # win.gdt.qq.com
 pgdt.gtimg.cn
-pgdt.ugdtimg.com
 pingma.qq.com
 sngmta.qq.com
 splashqqlive.gtimg.com
@@ -2108,7 +2103,6 @@ wxsnsdythumb.wxs.qq.com
 adsmind.gdtimg.com
 adsmind.ugdtimg.com
 qzs.gdtimg.com
-qzs.qq.com
 rmonitor.qq.com
 sdk.e.qq.com
 sdkconfig.video.qq.com
@@ -2660,7 +2654,6 @@ biz.live.xunlei.com
 ct.niu.xunlei.com
 mou.niu.xunlei.com
 scene.vip.xunlei.com
-advertpay.vip.xunlei.com
 static.m.sjzhushou.com
 etl.xlmc.sandai.net
 
@@ -2707,7 +2700,6 @@ mqtt.zhihu.com
 .monsetting.toutiao.com
 .mon.zijieapi.com
 .ad.zijieapi.com
-log.snssdk.com
 toblog.ctobsnssdk.com
 frontier-aweme-hl.snssdk.com
 
@@ -2784,7 +2776,6 @@ adm.10jqka.com.cn
 stat.10jqka.com.cn
 
 # >> UC
-applog.uc.cn
 applog-perf.uc.cn
 applog.ucdns.uc.cn
 gjapplog.uc.cn
@@ -3285,7 +3276,6 @@ ad.where.com
 ftpcontent.worldnow.com
 ads.saymedia.com
 adcontent.saymedia.com
-static.estebull.com
 go.vrvm.com
 c.vrvm.com
 .phluant.com

+ 0 - 37
Source/domainset/reject_extra.conf

@@ -163,7 +163,6 @@ nimiq.terorie.com
 .cosmosjackson.com
 .decoroustitle.com
 .decoycreation.com
-.drawservant.com
 .energeticexample.com
 .evanescentedge.com
 .farethief.com
@@ -321,7 +320,6 @@ nimiq.terorie.com
 .crydconnect.com
 .danceview.ru
 .easycucina.net
-.enjoy7plains.xyz
 .exploreshops.net
 .findallgainssurvey.top
 .findallgainsurvey.top
@@ -355,7 +353,6 @@ nimiq.terorie.com
 .uservalidate.xyz
 .wiki-review.net
 .worldsguide.net
-.zmfindyourhalf.top
 .zmprofitsurvey.top
 
 # >> Migrate from CoinBlockerLists
@@ -410,7 +407,6 @@ nimiq.terorie.com
 .guugll.eu
 .hashgate.eu
 .poolbe.eu
-.mineblocks.eu
 .minergalaxy.eu
 .xmrpool.eu
 .multicoin.eu
@@ -912,7 +908,6 @@ nimiq.terorie.com
 .swinemine.com
 .apple-bitcoin.com
 .coinminex.com
-.cointaft.com
 .aprilcoin.com
 .virdpool.com
 .arbitracoin.com
@@ -1119,7 +1114,6 @@ nimiq.terorie.com
 .statdynamic.com
 .poolflare.com
 .maxeter.com
-.chain-pool.com
 .coleganet.com
 .iqmining.com
 .chekazpools.com
@@ -1256,9 +1250,7 @@ nimiq.terorie.com
 .fsocietychain.com
 .gainprox.com
 .kattcoin.com
-.lamby-crypto.com
 .nikitonium.com
-.pepecore.com
 .pinchpool.com
 .redblockcoin.com
 .sevabit.com
@@ -1295,7 +1287,6 @@ nimiq.terorie.com
 .monxpool.com
 .gpugold.com
 .greenchiapool.com
-.grftpool.com
 .grosscrypto.com
 .haopool.com
 .raspi-ninja.com
@@ -1308,7 +1299,6 @@ nimiq.terorie.com
 .upxpool.com
 .xmrminerpro.com
 .stakeunited.com
-.onebtcplace.com
 .papoto.com
 .infamylists.com
 .inhive.com
@@ -1749,7 +1739,6 @@ nimiq.terorie.com
 .cpu-pool.net
 .cryptotab.net
 .notallmine.net
-.datapaw.net
 .minershq.net
 .deltapool.net
 .deepbit.net
@@ -1772,7 +1761,6 @@ nimiq.terorie.com
 .gay-hotvideo.net
 .hashhorse.net
 .nourpool.net
-.imzlabs.net
 .kinohabr.net
 .kisshentai.net
 .lyncoin.net
@@ -1781,13 +1769,10 @@ nimiq.terorie.com
 .monero-miner.net
 .mycoinwallet.net
 .nimiqtest.net
-.punchsub.net
 .roastedvolt.net
-.smpool.net
 .sweetbook.net
 .vcrypt.net
 .vidfile.net
-.yetimining.net
 .yobit.net
 .serverpower.net
 .haqo.net
@@ -1979,7 +1964,6 @@ nimiq.terorie.com
 .globalpool.org
 .oddpools.org
 .luckpool.org
-.igrid.org
 .keepool.org
 .kosmoplovci.org
 .kroma.org
@@ -2076,7 +2060,6 @@ nimiq.terorie.com
 .1ds.us
 .p2poolmining.us
 .multipool.us
-.50centfreedom.us
 .binance.us
 .acmining.us
 .gridcoin.us
@@ -2094,11 +2077,9 @@ nimiq.terorie.com
 .fungibly.xyz
 .cryptopool.xyz
 .ionize.xyz
-.easypool.xyz
 .blockcrushers.xyz
 .deipool.xyz
 .cojin.xyz
-.arhash.xyz
 .p2p-spb.xyz
 .pecadol.xyz
 .303365.xyz
@@ -2118,7 +2099,6 @@ nimiq.terorie.com
 .etcoin.xyz
 .avero.xyz
 .bitcoin-rebooted.xyz
-.enviromint.xyz
 .blockify.xyz
 .newpool.xyz
 .capung.xyz
@@ -2127,7 +2107,6 @@ nimiq.terorie.com
 .crpool.xyz
 .cryptominers.xyz
 .cryptopine.xyz
-.cyg2016.xyz
 .xazab.xyz
 .elphyrecoin.xyz
 .flyhash.xyz
@@ -2322,7 +2301,6 @@ nimiq.terorie.com
 .suprnova.cc
 .trustpool.cc
 .reactor.cc
-.anomp.cc
 .dpool.cc
 .minero.cc
 .smartcash.cc
@@ -2372,7 +2350,6 @@ nimiq.terorie.com
 .quickpool.tech
 .sia.tech
 .freecontent.date
-.cryptopool.space
 .coinminer.space
 .hashing.space
 .goodzen.space
@@ -2385,7 +2362,6 @@ nimiq.terorie.com
 .infinium.space
 .mineradnow.space
 .ukkey3.space
-.pool.space
 .blocx.space
 .cryptomorons.space
 .blockhunters.space
@@ -2459,7 +2435,6 @@ nimiq.terorie.com
 .vectorium.co
 .azakus.co
 .bitconnect.co
-.bitconnectpool.co
 .onepool.co
 .hashcoin.co
 .okcash.co
@@ -2725,12 +2700,7 @@ nimiq.terorie.com
 .hash.green
 .btc.to
 .hashrate.to
-.aurum7.to
-.automatix.to
 .xmr.to
-.coolmine.to
-.coolpool.to
-.dpool.to
 .estream.to
 .foxx.to
 .planet.to
@@ -2739,12 +2709,8 @@ nimiq.terorie.com
 .miningpool.to
 .piratebay.to
 .rig.to
-.speedpool.to
 .streamplay.to
-.sunpool.to
-.template-download.to
 .more.to
-.cfcnet.to
 .darkco.in
 .10xbitco.in
 .freico.in
@@ -2885,7 +2851,6 @@ nimiq.terorie.com
 .zona.pl
 .worldcoin.global
 .fireants.global
-.pool.do
 .datasecu.download
 .jqwww.download
 .mine.bz
@@ -3132,7 +3097,6 @@ kingsminer.ddnsking.com
 .pepperscorecoin.wixsite.com
 .plugin.brfiles.com
 .pool.4i7i.com
-.pool.bit34.com
 .pool.groupfabric.com
 .pool.paprikaex.com
 .pool.stalwartbucks.com
@@ -3435,7 +3399,6 @@ d3iz6lralvg77g.cloudfront.net
 .evolution-project.go.ro
 .max.csrss.website
 .monero.us.to
-.eos.zhizhu.to
 .a45.bulehero.in
 .a46.bulehero.in
 .a88.bulehero.in