Browse Source

Update Rules & Tools

SukkaW 4 years ago
parent
commit
936adaf2c6

+ 52 - 61
Build/build-reject-domainset.js

@@ -3,11 +3,13 @@ const { promises: fsPromises } = require('fs');
 const { resolve: pathResolve } = require('path');
 
 let cliProgress;
+let Piscina;
 try {
+  Piscina = require('piscina');
   cliProgress = require('cli-progress');
 } catch (e) {
   console.log('Dependencies not found');
-  console.log('"npm i cli-progress" then try again!');
+  console.log('"npm i cli-progress piscina" then try again!');
 
   console.error(e);
   process.exit(1);
@@ -54,7 +56,11 @@ async function processFilterRules(filterRulesUrl) {
   }
 
   /** @type Set<string> */
-  const whitelistDomainSets = new Set(['localhost', 'analytics.google.com']);
+  const whitelistDomainSets = new Set([
+    'localhost',
+    'analytics.google.com',
+    'msa.cdn.mediaset.net' // Added manually using DOMAIN-KEYWORDS
+  ]);
   /** @type Set<string> */
   const blacklistDomainSets = new Set();
 
@@ -75,10 +81,28 @@ async function processFilterRules(filterRulesUrl) {
       return;
     }
 
-    if (line.startsWith('@@||') && line.endsWith('^')) {
-      whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`.trim());
-    } else if (line.startsWith('||') && line.endsWith('^')) {
-      blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`.trim());
+    if (line.startsWith('@@||')
+      && (
+        line.endsWith('^')
+        || line.endsWith('^|')
+      )
+    ) {
+      whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
+    } else if (
+      line.startsWith('||')
+      && (
+        line.endsWith('^')
+        || line.endsWith('^|')
+      )
+    ) {
+      blacklistDomainSets.add(`.${line.replaceAll('||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
+    } else if (line.startsWith('://')
+      && (
+        line.endsWith('^')
+        || line.endsWith('^|')
+      )
+    ) {
+      blacklistDomainSets.add(`${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
     }
   });
 
@@ -132,8 +156,6 @@ async function processFilterRules(filterRulesUrl) {
   // Parse from AdGuard Filters
   /** @type Set<string> */
   const filterRuleWhitelistDomainSets = new Set();
-  /** @type Set<string> */
-  const filterRuleBlacklistDomainSets = new Set();
   (await Promise.all([
     processFilterRules('https://easylist.to/easylist/easylist.txt'),
     processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'),
@@ -143,15 +165,10 @@ async function processFilterRules(filterRulesUrl) {
     processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt')
   ])).forEach(({ white, black }) => {
     white.forEach(i => filterRuleWhitelistDomainSets.add(i));
-    black.forEach(i => filterRuleBlacklistDomainSets.add(i));
+    black.forEach(i => domainSets.add(i));
   });
 
-  for (const black of filterRuleBlacklistDomainSets) {
-    domainSets.add(`.${black}`);
-  }
-
-  console.log(`Import ${filterRuleBlacklistDomainSets.size} black rules from adguard filters!`);
-  console.log(`Import ${filterRuleWhitelistDomainSets.size} white rules from adguard filters!`);
+  console.log(`Import rules from adguard filters!`);
 
   // Read DOMAIN Keyword
   const domainKeywordsSet = new Set();
@@ -168,55 +185,29 @@ async function processFilterRules(filterRulesUrl) {
 
   // Dedupe domainSets
   console.log(`Start deduping!`);
-  const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
-
-  bar2.start(domainSets.size, 0);
-
-  for (const domain of domainSets) {
-    bar2.increment();
-
-    let shouldContinue = false;
-
-    for (const white of filterRuleWhitelistDomainSets) {
-      if (domain.includes(white) || white.includes(domain)) {
-        domainSets.delete(domain);
-        shouldContinue = true;
-        break;
-      }
-    }
-
-    if (shouldContinue) {
-      continue;
-    }
 
-    for (const keyword of domainKeywordsSet) {
-      if (domain.includes(keyword) || keyword.includes(domain)) {
-        domainSets.delete(domain);
-        shouldContinue = true;
-        break;
-      }
-    }
-
-    if (shouldContinue) {
-      continue;
-    }
+  const piscina = new Piscina({
+    filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js')
+  });
 
-    for (const domain2 of domainSets) {
-      if (
-        domain2.startsWith('.')
-        && domain2 !== domain
-        && (
-          domain.endsWith(domain2)
-          || `.${domain}` === domain2
-        )
-      ) {
-        domainSets.delete(domain);
-        break;
-      }
-    }
-  }
+  const res = await Promise.all([
+    piscina.run({ keywords: domainKeywordsSet, input: domainSets }, { name: 'dedupeKeywords' }),
+    piscina.run({ whiteList: filterRuleWhitelistDomainSets, input: domainSets }, { name: 'whitelisted' }),
+    ...sliceIntoChunks(Array.from(domainSets), 5000).map(chunk => piscina.run({ input: chunk, fullSet: domainSets }, { name: 'dedupe' }))
+  ]);
 
-  bar2.stop();
+  res.forEach(set => {
+    set.forEach(i => domainSets.delete(i));
+  });
 
   return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), `${[...domainSets].join('\n')}\n`);
 })();
+
+function sliceIntoChunks(arr, chunkSize) {
+  const res = [];
+  for (let i = 0; i < arr.length; i += chunkSize) {
+    const chunk = arr.slice(i, i + chunkSize);
+    res.push(chunk);
+  }
+  return res;
+}

+ 51 - 0
Build/worker/build-reject-domainset-worker.js

@@ -0,0 +1,51 @@
+exports.dedupe = ({ fullSet, input }) => {
+  const output = new Set();
+
+  for (const domain of input) {
+    for (const domain2 of fullSet) {
+      if (
+        domain2.startsWith('.')
+        && domain2 !== domain
+        && (
+          domain.endsWith(domain2)
+          || `.${domain}` === domain2
+        )
+      ) {
+        output.add(domain);
+        break;
+      }
+    }
+  }
+
+  return output;
+};
+
+exports.whitelisted = ({ whiteList, input }) => {
+  const output = new Set();
+
+  for (const domain of input) {
+    for (const white of whiteList) {
+      if (domain.includes(white) || white.includes(domain)) {
+        output.add(domain);
+        break;
+      }
+    }
+  }
+
+  return output;
+};
+
+exports.dedupeKeywords = ({ keywords, input }) => {
+  const output = new Set();
+
+  for (const domain of input) {
+    for (const keyword of keywords) {
+      if (domain.includes(keyword) || keyword.includes(domain)) {
+        output.add(domain);
+        break;
+      }
+    }
+  }
+
+  return output;
+}

+ 18 - 62
List/domainset/reject.conf

@@ -12,7 +12,6 @@
 .24log.de
 .24pm-affiliation.com
 .2mdn.net
-.2o7.net
 .2znp09oa.com
 .30ads.com
 .3337723.com
@@ -262,7 +261,6 @@
 .adjug.com
 .adjuggler.com
 .adjuggler.yourdictionary.com
-.adjust.com
 .adjustnetwork.com
 .adk2.co
 .adk2.com
@@ -889,7 +887,6 @@
 .ambiguousquilt.com
 .ambitiousagreement.com
 .americash.com
-.amplitude.com
 .amung.us
 .analytics-production.hapyak.com
 .analytics.adpost.org
@@ -935,7 +932,6 @@
 .apolloprogram.io
 .app-analytics.snapchat.com
 .appboycdn.com
-.appsflyer.com
 .aps.hearstnp.com
 .apsalar.com
 .apture.com
@@ -981,7 +977,6 @@
 .awakebird.com
 .awempire.com
 .awesomeagreement.com
-.awin1.com
 .awstrack.me
 .awzbijw.com
 .axiomaticalley.com
@@ -1397,7 +1392,6 @@
 .crsspxl.com
 .crta.dailymail.co.uk
 .crtv.mate1.com
-.crwdcntrl.net
 .crypto-loot.org
 .ctnetwork.hu
 .cubics.com
@@ -1490,7 +1484,6 @@
 .dotmetrics.net
 .doubleclick.com
 .doubleclick.de
-.doubleclick.net
 .doublepimp.com
 .doubleverify.com
 .doubtfulrainstorm.com
@@ -1786,7 +1779,6 @@
 .harrenmedia.com
 .harrenmedianetwork.com
 .havamedia.net
-.hb.afl.rakuten.co.jp
 .hbb.afl.rakuten.co.jp
 .hdscout.com
 .heap.com
@@ -1959,7 +1951,6 @@
 .jaizouji.com
 .jauchuwa.net
 .jcount.com
-.jdoqocy.com
 .jinkads.de
 .joetec.net
 .js-agent.newrelic.com
@@ -2209,7 +2200,6 @@
 .mlsend.com
 .mltrk.io
 .mmismm.com
-.mmstat.com
 .mmtro.com
 .moartraffic.com
 .moat.com
@@ -2363,7 +2353,6 @@
 .onenetworkdirect.com
 .onestat.com
 .onestatfree.com
-.online-metrix.net
 .online.miarroba.com
 .onlinecash.com
 .onlinecashmethod.com
@@ -2382,7 +2371,6 @@
 .openx.angelsgroup.org.uk
 .openx.cairo360.com
 .openx.kgmedia.eu
-.openx.net
 .openx.skinet.cz
 .openx.smcaen.fr
 .openx2.kytary.cz
@@ -2933,7 +2921,6 @@
 .stat.zenon.net
 .stat24.com
 .stat24.meta.ua
-.statcounter.com
 .statdynamic.com
 .staticads.btopenworld.com
 .statistik-gallup.net
@@ -3041,7 +3028,6 @@
 .tealeaf.com
 .tealium.cbsnews.com
 .tealium.com
-.tealiumiq.com
 .tedioustooth.com
 .teenrevenue.com
 .teenyvolcano.com
@@ -3170,7 +3156,6 @@
 .trackmysales.com
 .trackuhub.com
 .tradeadexchange.com
-.tradedoubler.com
 .trading-rtbg.com
 .traffic-exchange.com
 .traffic-redirecting.com
@@ -3998,7 +3983,6 @@ cache.soloth.com
 c-adash.m.taobao.com
 cal.meizu.com
 calopenupdate.comm.miui.com
-cas.pxl.ace.advertising.com
 catalog.video.msn.com
 c.baidu.com
 cb.alimama.cn
@@ -4037,7 +4021,6 @@ clicklog.moviebox.baofeng.net
 click.mz.simba.taobao.com
 click.qianqian.com
 clicks.beap.bc.yahoo.com
-click.simba.taobao.com
 click.tianyaui.com
 click.tz.simba.taobao.com
 click.uve.mobile.sina.cn
@@ -4619,7 +4602,6 @@ mazu.3g.qq.com
 m.bailingjiankang.com
 m.breezily168.com
 mb.yidianzixun.com
-mclick.simba.taobao.com
 m.clkservice.youdao.com
 m.clubske.com
 mcore.vcgame.cn
@@ -4836,7 +4818,6 @@ pindao.huoban.taobao.com
 ping.acc.sogou.com
 pinghot.qq.com
 ping.pinyin.sogou.com
-pixel.advertising.com
 pix.impdesk.com
 pixiu.shangshufang.ksosoft.com
 plat.videojj.com
@@ -4863,7 +4844,6 @@ psfq.gou.sogou.com
 ps.haohaowan8.com
 p.store.qq.com
 publish-pic-cpu.baidu.com
-pub.pxl.ace.advertising.com
 pub.se.360.cn
 puds.test.uae.uc.cn
 pull.push.sogou.com
@@ -4948,7 +4928,6 @@ rs.haohaowan8.com
 rtas.videocc.net
 rt.funshion.net
 rtlog.vidown.cn
-rub.pxl.ace.advertising.com
 s.051352.com
 s.0594529.com
 s1.cmfu.com
@@ -5638,18 +5617,6 @@ iotapi.adups.com
 ph5t.adups.com
 push5.adups.com
 f.aduwant.com
-pxl.ace.advertising.com
-ace-tag.advertising.com
-cmp.advertising.com
-cdn.cmp.advertising.com
-o.cmp.advertising.com
-sp.advertising.com
-tag.sp.advertising.com
-ssp.advertising.com
-prod-m-node-1111.ssp.advertising.com
-prod-m-node-1211.ssp.advertising.com
-prod-m-node-2111.ssp.advertising.com
-prod-m-node-3111.ssp.advertising.com
 adview.pl
 d.adx.io
 s.adx.io
@@ -6720,7 +6687,6 @@ cdn.madkudu.com
 track.mads.ph
 acc.magixite.com
 api.magmamobile.com
-ad.mail.ru
 assets.mantisadnetwork.com
 ecs.mantisadnetwork.com
 resize.mantisadnetwork.com
@@ -14744,7 +14710,6 @@ file.apicvn.com
 .imaarracon.co
 .imageadvantage.net
 .imagineposition.com
-.imasdk.googleapis.com
 .imbetan.info
 .imcpqxmtlapagng.com
 .imediarevenue.com
@@ -22925,7 +22890,6 @@ file.apicvn.com
 .ad.outsidehub.com
 .ad.reklamport.com
 .ad.smartmediarep.com
-.adaptv.advertising.com
 .ads-sg.tiktok.com
 .ads.audio.thisisdax.com
 .ads.sportradar.com
@@ -23405,6 +23369,10 @@ file.apicvn.com
 .x0r.urlgalleries.net
 .yotta.scrolller.com
 .ytre9jk.txxx.com
+.arkfacialdaybreak.com
+.ligninenchant.com
+.ouphoarg.com
+.hearthatdistant.top
 .aggingleag.one
 .oweqas.xyz
 .gyrivehmic.com
@@ -26370,7 +26338,6 @@ file.apicvn.com
 .adready.com
 .adreadytractions.com
 .adrecord.com
-.adrelayer.com
 .adresellers.com
 .adrise.de
 .adro.co
@@ -26680,7 +26647,6 @@ file.apicvn.com
 .cuelinks.com
 .currentlyobsessed.me
 .cybmas.com
-.dable.io
 .datawrkz.com
 .dating-service.net
 .datinggold.com
@@ -26699,7 +26665,6 @@ file.apicvn.com
 .dochase.com
 .dollarsponsor.com
 .domainadvertising.com
-.dotomi.com
 .dreamaquarium.com
 .dt00.net
 .dt07.net
@@ -27226,6 +27191,7 @@ file.apicvn.com
 .sndkorea.co.kr
 .sni.ps
 .snigelweb.com
+.snowads.net
 .so-excited.com
 .soagitet.net
 .soalouve.com
@@ -27305,7 +27271,6 @@ file.apicvn.com
 .tapnative.com
 .tardangro.com
 .targeterra.info
-.targetspot.com
 .tattomedia.com
 .tbaffiliate.com
 .tcadops.ca
@@ -27519,7 +27484,6 @@ file.apicvn.com
 .api.ads.watchabc.go.com
 .ad.prismamediadigital.com
 .socdm.com
-.appodeal.com
 .a.320981a9244924ef86ebdbb9eb877e9f21ce83f1e3cc89b2c5e7c3ff.com
 .a.76674bdad304297eda3d325f449f6f49.com
 .geoloc.yospace.com
@@ -27600,6 +27564,7 @@ file.apicvn.com
 .restartad.com
 .appgiftwall.oss-us-west-1.aliyuncs.com
 .appgift.sinaapp.com
+.ad.mail.ru
 .servedby.o2.co.uk
 .amillionads.com
 .mcoreads.com
@@ -28015,6 +27980,7 @@ file.apicvn.com
 .n152adserv.com
 .rg.yottos.com
 .weboohost.com
+.mon.snssdk.com
 .log-hl.snssdk.com
 .ib.snssdk.com
 .xlog.snssdk.com
@@ -28118,7 +28084,6 @@ file.apicvn.com
 .kissmetrics.io
 .counter.kikibobo.top
 .c1.politexpert.ru
-.logrocket.com
 .ipmeta.io
 .datasign.co
 .analytics.daasrv.net
@@ -28702,7 +28667,6 @@ file.apicvn.com
 .pfx.sma-clsystem.info
 .tangankanan.net
 .topiz.ru
-.hit.interia.pl
 .socialhoney.co
 .trk.olnl.net
 .ustat.pro
@@ -28744,7 +28708,6 @@ file.apicvn.com
 .plugin.management
 .back.marketing
 .nsaudience.pl
-.rutarget.ru
 .adx.com.ru
 .visitor-analytics.io
 .giraff.io
@@ -29511,7 +29474,6 @@ file.apicvn.com
 .gallupnet.fi
 .gbotvisit.com
 .geistm.com
-.gemius.pl
 .gemtrackers.com
 .generaltracking.de
 .genieesspv.jp
@@ -29811,7 +29773,6 @@ file.apicvn.com
 .logaholic.com
 .logcounter.com
 .logdy.com
-.logentries.com
 .loger.ru
 .lognormal.net
 .logxp.ru
@@ -30073,8 +30034,6 @@ file.apicvn.com
 .pclicks.com
 .pcspeedup.com
 .peakcounter.dk
-.peerius.com
-.perfdrive.com
 .performax.cz
 .performtracking.com
 .perion.com
@@ -30329,7 +30288,6 @@ file.apicvn.com
 .shrinktheweb.com
 .siftscience.com
 .signup-way.com
-.silverpop.com
 .silverpush.co
 .simplereach.com
 .simplycast.us
@@ -31435,6 +31393,8 @@ file.apicvn.com
 .um-public-panel-prod.s3.amazonaws.com
 .collection-endpoint-prod.herokuapp.com
 .collection-endpoint-staging.herokuapp.com
+.log.appdriver.jp
+.gtm.udemy.com
 .t.clic2buy.com
 .blue.fox8.com
 .astat.nikkei.co.jp
@@ -32623,7 +32583,6 @@ file.apicvn.com
 .analytics.archive.org
 .analytics.artirix.com
 .analytics.aweber.com
-.analytics.edgekey.net
 .analytics.edgesuite.net
 .analytics.electro-com.ru
 .analytics.fairfax.com.au
@@ -33013,6 +32972,14 @@ file.apicvn.com
 .int.vlancaa.site
 .int.vlancaa.fun
 .tok.vaicore.xyz
+.aff-report.lbesecapi.com
+.aff-policy.lbesecapi.com
+.psi.lbesecapi.com
+.report-ps.meettech.net
+.f-log-mobile.grammarly.io
+.f-log-mobile-ios.grammarly.io
+.ada-tr.staff-start.com
+.analytics.liveact-vault.com
 .api.statsig.com
 .logs-01.loggly.com
 .analytics.300624.com
@@ -33255,7 +33222,6 @@ file.apicvn.com
 .api.crittercism.com
 .api.gimbal.com
 .api.sec.miui.com
-.appmetrica.yandex.com
 .beacon.carfax.com
 .beacon.shazam.com
 .beha.ksmobile.com
@@ -33602,7 +33568,6 @@ file.apicvn.com
 .geotargetly-1a441.appspot.com
 .geotargetly.co
 .getambassador.com
-.getblueshift.com
 .getpublica.com
 .getviously.com
 .gglscr.online
@@ -33752,7 +33717,6 @@ file.apicvn.com
 .mynsystems.com
 .mytrack.pro
 .n74s9.com
-.nakanohito.jp
 .natero.com
 .ndf81.com
 .nejmqianyan.cn
@@ -33886,7 +33850,6 @@ file.apicvn.com
 .segmetrics.io
 .selectmedia.asia
 .sellebrity.com
-.sellpoint.net
 .semantic-finder.com
 .seon.io
 .seondnsresolve.com
@@ -33973,7 +33936,6 @@ file.apicvn.com
 .trackonomics.net
 .traffer.net
 .trafficfuel.com
-.trafficguard.ai
 .trafficroots.com
 .transactionale.com
 .traq.li
@@ -34770,7 +34732,6 @@ file.apicvn.com
 .events.split.io
 .events.yourcx.io
 .eventtracker.videostrip.com
-.exacttarget.com
 .fcmatch.google.com
 .fcmatch.youtube.com
 .filament-stats.herokuapp.com
@@ -35383,7 +35344,6 @@ file.apicvn.com
 .adplan7.com
 .adpon.jp
 .af-z.jp
-.afi-b.com
 .aid-ad.jp
 .amoad.com
 .analyticsip.net
@@ -35394,7 +35354,6 @@ file.apicvn.com
 .d2-apps.net
 .deteql.net
 .e-click.jp
-.ebis.ne.jp
 .ec-concier.com
 .ec-optimizer.com
 .eltex.co.jp
@@ -35405,7 +35364,6 @@ file.apicvn.com
 .gmossp-sp.jp
 .gsspcln.jp
 .gyro-n.com
-.i2i.jp
 .iid-network.jp
 .iogous.com
 .kaizenplatform.net
@@ -36011,7 +35969,6 @@ file.apicvn.com
 .hzucji.com
 .0iqiehoa35.ru
 .inforuss.site
-.ouphoarg.com
 .dfffpyu8fhawcnd.ru
 .dfnetwork.link
 .fodorw.com
@@ -37187,7 +37144,6 @@ file.apicvn.com
 .adward.ru
 .adwidgets.ru
 .adwile.net
-.adwolf.ru
 .adxgeeks.com
 .adylalahb.ru
 .aerontre.com
@@ -40527,7 +40483,6 @@ file.apicvn.com
 .aff.jskyservices.com
 .a-i-ad.com
 .managedweb.net
-.thench.net
 .bannerfarm.aolp.jp
 .koukoku.red
 .amaprop.net
@@ -42356,6 +42311,7 @@ file.apicvn.com
 .wfgo5vjs6cp0cdjx7bms3cn9lof2p01xho0pxgm1fd2fah3j5d8qd7x1d6eu.me
 .govguangxi.cn
 .w8sg6.xyz
+.98765432100.site
 .00-gov.cn
 .000123456789.site
 .0024aaaa.com

+ 2 - 0
List/non_ip/reject.conf

@@ -34,6 +34,8 @@ DOMAIN-KEYWORD,adsyndication
 DOMAIN-KEYWORD,bahoom,REJECT
 DOMAIN,daisydiskapp.com,REJECT
 
+AND, ((DOMAIN-SUFFIX,msa.cdn.mediaset.net), (DOMAIN-KEYWORD,adv0))
+
 # >> Tencent Lemon
 
 PROCESS-NAME,Tencent Lemon,REJECT