ソースを参照

Remove outdated/dead domains

SukkaW 10 ヶ月 前
コミット
fbd15df6ed
3 ファイル変更17 行追加36 行削除
  1. 17 8
      Build/tools-dedupe-src.ts
  2. 0 3
      Source/domainset/reject.conf
  3. 0 25
      Source/domainset/reject_extra.conf

+ 17 - 8
Build/tools-dedupe-src.ts

@@ -4,6 +4,7 @@ import fsp from 'node:fs/promises';
 import { SOURCE_DIR } from './constants/dir';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { processLine } from './lib/process-line';
+import { HostnameSmolTrie } from './lib/trie';
 
 const ENFORCED_WHITELIST = [
   'hola.sk',
@@ -17,7 +18,8 @@ const ENFORCED_WHITELIST = [
   'samsungqbe.com'
 ];
 
-const WHITELIST: string[] = ['ton.local.twitter.com', 'prod.msocdn.com', 'twemoji.maxcdn.com', 'img.urlnode.com', 'ipfsgate.com', 'googleplay.pro', 'iadmatapk.nosdn.127.net', 'hola-shopping.com', 'brdtest.co', 'mynextphone.io', 'hola.hk', 'holashop.org', 'hola.sk', 'hola.com.sg', 'c.medialytics.com', 'adstats.mgc-games.com', 'search.mgc-games.com', 'kissdoujin.com', 'newminersage.com', 'trossmining.de', 'hashncash.net', 'microsolt.ru', 'moneropool.ru', 'hashforcash.us', 'bitcoinn.biz', 'webmining.co', 'lamba.top', 'httpdns.bilivideo.com', 'httpdns-v6.gslb.yy.com', 'k-cdn.depot.dev', 'li-cdn.com'];
+const WHITELIST: string[] = ['.us-api.samsungyosemite.com', '.api-hub.samsungyosemite.com', '.holashop.org', '.solopools.net', '.hola-shopping.com', '.100xbtc.com', '.brdtest.co', '.yelts.net', '.axepool.com', '.luxxeeu.com', '.etc-pool.com', '.alph2mine.com', 'samsungcloudsolution.com', 'samsungcloudsolution.net', 'samsungqbe.com', 'lgtvsdp.com', '.apextop.cc', '.p2p-south.xyz', '.r-pool.net', '.celcoin.io', '.gameforxe.eu', '.kipcoin.org', '.cryptonote.club', '.12level.com', '.piratenbucht.eu', '.minersmine.com', '.blockhunter.info', '.hola.com.sg', '.down.my0115.ru', '.js.my0115.ru', '.wmi.my0115.ru', '.statistic.date', '.pawpools.space', '.nimbocoin.com', '.hola.hk', '.hola.sk', '.solopools.org', '.minereasy.com', '.mynextphone.io', '.newzgames.com'];
+
 (async () => {
   const files = await new Fdir()
     .withFullPaths()
@@ -31,7 +33,9 @@ const WHITELIST: string[] = ['ton.local.twitter.com', 'prod.msocdn.com', 'twemoj
     .crawl(SOURCE_DIR)
     .withPromise();
 
-  const whitelist = WHITELIST.filter((item) => ENFORCED_WHITELIST.every((whitelistItem) => !isDomainSuffix(whitelistItem, item)));
+  const whiteTrie = new HostnameSmolTrie(WHITELIST);
+  ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item));
+  const whitelist = whiteTrie.dump();
 
   await Promise.all(files.map(file => dedupeFile(file, whitelist)));
 })();
@@ -56,7 +60,7 @@ async function dedupeFile(file: string, whitelist: string[]) {
     }
 
     // We can't use a trie here since we need to keep the order
-    if (whitelist.some((item) => isDomainSuffix(item, line))) {
+    if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) {
       continue;
     }
 
@@ -67,10 +71,15 @@ async function dedupeFile(file: string, whitelist: string[]) {
   return fsp.writeFile(file, result.join('\n') + '\n');
 }
 
-function isDomainSuffix(suffixRule: string, domain: string) {
-  if (suffixRule.length > domain.length + 1) {
-    return false;
-  }
+function isDomainSuffix(whiteItem: string, incomingItem: string) {
+  const whiteIncludeDomain = whiteItem[0] === '.';
+  whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
 
-  return suffixRule === domain || domain.endsWith('.' + suffixRule);
+  if (whiteItem === incomingItem) {
+    return true; // as long as exact match, we don't care if subdomain is included or not
+  }
+  if (whiteIncludeDomain) {
+    return incomingItem.endsWith('.' + whiteItem);
+  }
+  return false;
 }

+ 0 - 3
Source/domainset/reject.conf

@@ -3090,8 +3090,6 @@ samsungqbe.com
 .vd.emp.prd.s3.amazonaws.com
 .syncplusconfig.s3.amazonaws.com
 .coordinator-production-28516768.us-east-1.elb.amazonaws.com
-.us-api.samsungyosemite.com
-.api-hub.samsungyosemite.com
 .samsungads.com
 samsungcloudsolution.com
 .gamespromotion.samsungcloudsolution.com
@@ -3110,7 +3108,6 @@ samsungcloudsolution.com
 .legacyportal.nettvservices.com
 .aic-ngfts.lge.com
 .lgtvcommon.com
-lgtvsdp.com
 .yumenetworks.com
 .ibs.lgappstv.com
 .ibis.lgappstv.com

+ 0 - 25
Source/domainset/reject_extra.conf

@@ -120,7 +120,6 @@ nimiq.terorie.com
 .kippo.eu
 .miningpool.eu
 .ellaism.eu
-.gameforxe.eu
 .cloudhash.eu
 .cryptocable.eu
 .coinfaucet.eu
@@ -139,7 +138,6 @@ nimiq.terorie.com
 .xmrpool.eu
 .multicoin.eu
 .ryopool.eu
-.piratenbucht.eu
 .urbanproxy.eu
 .eossweden.eu
 .easymining.eu
@@ -334,7 +332,6 @@ nimiq.terorie.com
 .10minutebitcoin.com
 .10xbitcoin.com
 .12inv.com
-.12level.com
 .13xbtc.com
 .14daily.com
 .15coin.com
@@ -393,7 +390,6 @@ nimiq.terorie.com
 .100-btc.com
 .100btc.com
 .ikpool.com
-.100xbtc.com
 .poolchia.com
 .120bit.com
 .120bitcoins.com
@@ -477,7 +473,6 @@ nimiq.terorie.com
 .woolypooly.com
 .hash2mine.com
 .minecraftpool.com
-.minereasy.com
 .minerrocks.com
 .semipool.com
 .xminingpool.com
@@ -528,7 +523,6 @@ nimiq.terorie.com
 .majanetwork.com
 .mine2gether.com
 .huopool.com
-.alph2mine.com
 .e4pool.com
 .minepool.com
 .nimiqpool.com
@@ -746,7 +740,6 @@ nimiq.terorie.com
 .awesomeminers.com
 .awmlite.com
 .awsmining.com
-.axepool.com
 .axerc.com
 .axletrade.com
 .axtrades.com
@@ -986,7 +979,6 @@ nimiq.terorie.com
 .erstweal.com
 .ethashpool.com
 .pearpool.com
-.etc-pool.com
 .etcturk.com
 .ethteam.com
 .xzrm.com
@@ -1009,7 +1001,6 @@ nimiq.terorie.com
 .kattcoin.com
 .lamby-crypto.com
 .nikitonium.com
-.nimbocoin.com
 .pepecore.com
 .pinchpool.com
 .redblockcoin.com
@@ -1062,7 +1053,6 @@ nimiq.terorie.com
 .stringclass.com
 .upxpool.com
 .xmrminerpro.com
-.minersmine.com
 .stakeunited.com
 .onebtcplace.com
 .papoto.com
@@ -1304,7 +1294,6 @@ nimiq.terorie.com
 .wemine.pro
 .mecrypto.club
 .poolofd32th.club
-.cryptonote.club
 .572133.club
 .cpufan.club
 .cryptopowered.club
@@ -1549,14 +1538,12 @@ nimiq.terorie.com
 .imzlabs.net
 .kinohabr.net
 .kisshentai.net
-.r-pool.net
 .lyncoin.net
 .minecointo.net
 .ypool.net
 .monero-miner.net
 .mycoinwallet.net
 .nimiqtest.net
-.solopools.net
 .punchsub.net
 .roastedvolt.net
 .smpool.net
@@ -1565,7 +1552,6 @@ nimiq.terorie.com
 .vidfile.net
 .yetimining.net
 .yobit.net
-.yelts.net
 .serverpower.net
 .haqo.net
 .3322.net
@@ -1574,7 +1560,6 @@ nimiq.terorie.com
 .eosweb.net
 .eossf.net
 .thedimi.net
-.kipcoin.org
 .nanopool.org
 .cryptopool.org
 .fairhash.org
@@ -1786,7 +1771,6 @@ nimiq.terorie.com
 .notmining.org
 .phloxcoin.org
 .pietpiraat.org
-.solopools.org
 .pr0gram.org
 .seedpool.org
 .tpbproxyone.org
@@ -1929,7 +1913,6 @@ nimiq.terorie.com
 .elphyrecoin.xyz
 .flyhash.xyz
 .p2p-ekb.xyz
-.p2p-south.xyz
 .p2p-usa.xyz
 .premiumstats.xyz
 .scpool.xyz
@@ -2034,7 +2017,6 @@ nimiq.terorie.com
 .bitminer.io
 .bitnex.io
 .bitnodes.io
-.celcoin.io
 .hellar.io
 .btg.io
 .quarkchain.io
@@ -2125,7 +2107,6 @@ nimiq.terorie.com
 .trustpool.cc
 .reactor.cc
 .anomp.cc
-.apextop.cc
 .dpool.cc
 .minero.cc
 .smartcash.cc
@@ -2177,7 +2158,6 @@ nimiq.terorie.com
 .quickpool.tech
 .sia.tech
 .freecontent.date
-.statistic.date
 .cryptopool.space
 .coinminer.space
 .hashing.space
@@ -2189,7 +2169,6 @@ nimiq.terorie.com
 .alphaserpentis.space
 .electroneum.space
 .infinium.space
-.pawpools.space
 .mineradnow.space
 .ukkey3.space
 .pool.space
@@ -2305,7 +2284,6 @@ nimiq.terorie.com
 .nitsche.top
 .qukuai.top
 .uralscoin.info
-.blockhunter.info
 .vericoin.info
 .0on.info
 .bchain.info
@@ -3153,9 +3131,6 @@ vbk-reb0rn.ddns.net
 .chia.net.ru
 .coffeandcigarets.asyx.ru
 .daxuxaboqy.7ci.ru
-.down.my0115.ru
-.js.my0115.ru
-.wmi.my0115.ru
 .eosnode.b1.ru
 .f0261452.xsph.ru
 .ftp.ruisgood.ru