Browse Source

Refactor: new MITM hostname sgmodule codegen

SukkaW 1 year ago
parent
commit
5577f3b14b

+ 10 - 2
Build/build-common.ts

@@ -14,6 +14,7 @@ import { DomainsetOutput, RulesetOutput } from './lib/create-file';
 const MAGIC_COMMAND_SKIP = '# $ custom_build_script';
 const MAGIC_COMMAND_SKIP = '# $ custom_build_script';
 const MAGIC_COMMAND_TITLE = '# $ meta_title ';
 const MAGIC_COMMAND_TITLE = '# $ meta_title ';
 const MAGIC_COMMAND_DESCRIPTION = '# $ meta_description ';
 const MAGIC_COMMAND_DESCRIPTION = '# $ meta_description ';
+const MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES = '# $ sgmodule_mitm_hostnames ';
 
 
 const domainsetSrcFolder = 'domainset' + path.sep;
 const domainsetSrcFolder = 'domainset' + path.sep;
 
 
@@ -73,6 +74,7 @@ const processFile = (span: Span, sourcePath: string) => {
 
 
     let title = '';
     let title = '';
     const descriptions: string[] = [];
     const descriptions: string[] = [];
+    let sgmodulePathname: string | null = null;
 
 
     try {
     try {
       for await (const line of readFileByLine(sourcePath)) {
       for await (const line of readFileByLine(sourcePath)) {
@@ -90,6 +92,11 @@ const processFile = (span: Span, sourcePath: string) => {
           continue;
           continue;
         }
         }
 
 
+        if (line.startsWith(MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES)) {
+          sgmodulePathname = line.slice(MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES.length).trim();
+          continue;
+        }
+
         const l = processLine(line);
         const l = processLine(line);
         if (l) {
         if (l) {
           lines.push(l);
           lines.push(l);
@@ -100,7 +107,7 @@ const processFile = (span: Span, sourcePath: string) => {
       console.trace(e);
       console.trace(e);
     }
     }
 
 
-    return [title, descriptions, lines] as const;
+    return [title, descriptions, lines, sgmodulePathname] as const;
   });
   });
 };
 };
 
 
@@ -148,7 +155,7 @@ async function transformRuleset(parentSpan: Span, sourcePath: string, relativePa
         throw new TypeError(`Invalid type: ${type}`);
         throw new TypeError(`Invalid type: ${type}`);
       }
       }
 
 
-      const [title, descriptions, lines] = res;
+      const [title, descriptions, lines, sgmodulePathname] = res;
 
 
       let description: string[];
       let description: string[];
       if (descriptions.length) {
       if (descriptions.length) {
@@ -162,6 +169,7 @@ async function transformRuleset(parentSpan: Span, sourcePath: string, relativePa
       return new RulesetOutput(span, id, type)
       return new RulesetOutput(span, id, type)
         .withTitle(title)
         .withTitle(title)
         .withDescription(description)
         .withDescription(description)
+        .withMitmSgmodulePath(sgmodulePathname)
         .addFromRuleset(lines)
         .addFromRuleset(lines)
         .write();
         .write();
     });
     });

+ 0 - 24
Build/build-mitm-hostname.ts

@@ -7,30 +7,6 @@ import { getHostname } from 'tldts';
 import { OUTPUT_SURGE_DIR } from './constants/dir';
 import { OUTPUT_SURGE_DIR } from './constants/dir';
 
 
 const PRESET_MITM_HOSTNAMES = [
 const PRESET_MITM_HOSTNAMES = [
-  // '*baidu.com',
-  '*.ydstatic.com',
-  // '*snssdk.com',
-  // '*musical.com',
-  // '*musical.ly',
-  // '*snssdk.ly',
-  'api.zhihu.com',
-  'www.zhihu.com',
-  'api.chelaile.net.cn',
-  'atrace.chelaile.net.cn',
-  '*.meituan.net',
-  'ctrl.playcvn.com',
-  'ctrl.playcvn.net',
-  'ctrl.zmzapi.com',
-  'ctrl.zmzapi.net',
-  'api.zhuishushenqi.com',
-  'b.zhuishushenqi.com',
-  'ggic.cmvideo.cn',
-  'ggic2.cmvideo.cn',
-  'mrobot.pcauto.com.cn',
-  'mrobot.pconline.com.cn',
-  'home.umetrip.com',
-  'discardrp.umetrip.com',
-  'startup.umetrip.com',
   'dsp-x.jd.com',
   'dsp-x.jd.com',
   'bdsp-x.jd.com'
   'bdsp-x.jd.com'
 ];
 ];

+ 28 - 3
Build/lib/rules/base.ts

@@ -1,4 +1,4 @@
-import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../../constants/dir';
+import { OUTPUT_CLASH_DIR, OUTPUT_MODULES_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../../constants/dir';
 import type { Span } from '../../trace';
 import type { Span } from '../../trace';
 import { createTrie } from '../trie';
 import { createTrie } from '../trie';
 import stringify from 'json-stringify-pretty-compact';
 import stringify from 'json-stringify-pretty-compact';
@@ -256,7 +256,7 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
     invariant(this.title, 'Missing title');
     invariant(this.title, 'Missing title');
     invariant(this.description, 'Missing description');
     invariant(this.description, 'Missing description');
 
 
-    await Promise.all([
+    const promises = [
       compareAndWriteFile(
       compareAndWriteFile(
         this.span,
         this.span,
         withBannerArray(
         withBannerArray(
@@ -282,12 +282,37 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
         this.singbox(),
         this.singbox(),
         path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
         path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
       )
       )
-    ]);
+    ];
+
+    if (this.mitmSgmodule) {
+      const sgmodule = this.mitmSgmodule();
+      const sgMOdulePath = this.mitmSgmodulePath ?? path.join(this.type, this.id + '.sgmodule');
+      if (sgmodule) {
+        promises.push(
+          compareAndWriteFile(
+            this.span,
+            sgmodule,
+            path.join(OUTPUT_MODULES_DIR, sgMOdulePath)
+          )
+        );
+      }
+    }
+
+    await Promise.all(promises);
   }
   }
 
 
   abstract surge(): string[];
   abstract surge(): string[];
   abstract clash(): string[];
   abstract clash(): string[];
   abstract singbox(): string[];
   abstract singbox(): string[];
+
+  protected mitmSgmodulePath: string | null = null;
+  withMitmSgmodulePath(path: string | null) {
+    if (path) {
+      this.mitmSgmodulePath = path;
+    }
+    return this;
+  }
+  abstract mitmSgmodule?(): string[] | null;
 }
 }
 
 
 export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {
 export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {

+ 96 - 0
Build/lib/rules/ruleset.ts

@@ -6,6 +6,9 @@ import { appendArrayFromSet } from '../misc';
 import type { SingboxSourceFormat } from '../singbox';
 import type { SingboxSourceFormat } from '../singbox';
 import { sortDomains } from '../stable-sort-domain';
 import { sortDomains } from '../stable-sort-domain';
 import { RuleOutput } from './base';
 import { RuleOutput } from './base';
+import picocolors from 'picocolors';
+import { normalizeDomain } from '../normalize-domain';
+import { isProbablyIpv4 } from '../is-fast-ip';
 
 
 type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
 type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
 
 
@@ -131,4 +134,97 @@ export class RulesetOutput extends RuleOutput<Preprocessed> {
 
 
     return RuleOutput.jsonToLines(singbox);
     return RuleOutput.jsonToLines(singbox);
   }
   }
+
+  mitmSgmodule(): string[] | null {
+    if (this.urlRegex.size === 0 || this.mitmSgmodulePath === null) {
+      return null;
+    }
+
+    const urlRegexResults: Array<{ origin: string, processed: string[] }> = [];
+
+    const parsedFailures: Array<[original: string, processed: string]> = [];
+    const parsed: Array<[original: string, domain: string]> = [];
+
+    for (let urlRegex of this.urlRegex) {
+      if (
+        urlRegex.startsWith('http://')
+        || urlRegex.startsWith('^http://')
+      ) {
+        continue;
+      }
+      if (urlRegex.startsWith('^https?://')) {
+        urlRegex = urlRegex.slice(10);
+      }
+      if (urlRegex.startsWith('^https://')) {
+        urlRegex = urlRegex.slice(9);
+      }
+
+      const potentialHostname = urlRegex.split('/')[0]
+        // pre process regex
+        .replaceAll(String.raw`\.`, '.')
+        .replaceAll('.+', '*')
+        .replaceAll(/([a-z])\?/g, '($1|)')
+        // convert regex to surge hostlist syntax
+        .replaceAll('([a-z])', '?')
+        .replaceAll(String.raw`\d`, '?')
+        .replaceAll(/\*+/g, '*');
+
+      let processed: string[] = [potentialHostname];
+
+      const matches = [...potentialHostname.matchAll(/\((?:([^()|]+)\|)+([^()|]*)\)/g)];
+
+      if (matches.length > 0) {
+        const replaceVariant = (combinations: string[], fullMatch: string, options: string[]): string[] => {
+          const newCombinations: string[] = [];
+
+          combinations.forEach(combination => {
+            options.forEach(option => {
+              newCombinations.push(combination.replace(fullMatch, option));
+            });
+          });
+
+          return newCombinations;
+        };
+
+        for (let i = 0; i < matches.length; i++) {
+          const match = matches[i];
+          const [_, ...options] = match;
+
+          processed = replaceVariant(processed, _, options);
+        }
+      }
+
+      urlRegexResults.push({
+        origin: potentialHostname,
+        processed
+      });
+    }
+
+    for (const i of urlRegexResults) {
+      for (const processed of i.processed) {
+        if (normalizeDomain(
+          processed
+            .replaceAll('*', 'a')
+            .replaceAll('?', 'b')
+        )) {
+          parsed.push([i.origin, processed]);
+        } else if (!isProbablyIpv4(processed)) {
+          parsedFailures.push([i.origin, processed]);
+        }
+      }
+    }
+
+    console.error(picocolors.bold('Parsed Failed'));
+    if (parsedFailures.length > 0) {
+      console.table(parsedFailures);
+    }
+
+    return [
+      '#!name=[Sukka] Surge Reject MITM',
+      '#!desc=为 URL Regex 规则组启用 MITM',
+      '',
+      '[MITM]',
+      'hostname = %APPEND% ' + Array.from(new Set(parsed.map(i => i[1]))).join(', ')
+    ];
+  }
 }
 }

File diff suppressed because it is too large
+ 0 - 6
Modules/sukka_mitm_hostnames.sgmodule


+ 0 - 21
Modules/sukka_url_rewrite.sgmodule

@@ -1,21 +0,0 @@
-#!name=[Sukka] URL Rewrite
-#!desc=Enable this module to use Sukka URL Rewrite rules
-
-[MITM]
-
-hostname = %APPEND% api.abema.io, union.click.jd.com, nomo.dafork.com
-
-[URL Rewrite]
-# AbeamTV Unlock
-^https?://api\.abema\.io/v\d/ip/check - reject
-
-# JD Protection
-^https?://union\.click\.jd\.com/jda? http://union.click.jd.com/jda?adblock= header
-^https?://union\.click\.jd\.com/sem.php? http://union.click.jd.com/sem.php?adblock= header
-
-# Special AD Block Section
-
-# >> eLong
-^https?://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/(adgateway|adv)/ - reject
-# >> NOMO
-^https?://nomo.dafork.com/api/v3/iap/ios_product_list https://ruleset.skk.moe/Mock/nomo.json 302

+ 4 - 2
Source/non_ip/reject-url-regex.conf

@@ -1,6 +1,7 @@
 # $ meta_title Sukka's Ruleset - Reject URL
 # $ meta_title Sukka's Ruleset - Reject URL
 # $ meta_description The ruleset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining
 # $ meta_description The ruleset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining
 # $ meta_description Need Surge Module: https://ruleset.skk.moe/Modules/sukka_mitm_hostnames.sgmodule
 # $ meta_description Need Surge Module: https://ruleset.skk.moe/Modules/sukka_mitm_hostnames.sgmodule
+# $ sgmodule_mitm_hostnames sukka_mitm_hostnames.sgmodule
 
 
 # URL-REGEX,^https?://.+\.youtube\.com/api/stats/.+adformat
 # URL-REGEX,^https?://.+\.youtube\.com/api/stats/.+adformat
 # URL-REGEX,^https?://.+\.youtube\.com/api/stats/ads
 # URL-REGEX,^https?://.+\.youtube\.com/api/stats/ads
@@ -45,7 +46,7 @@ URL-REGEX,^https://img-ys011\.didistatic\.com/static/ad_oss/
 
 
 # >> Hanglvzongheng
 # >> Hanglvzongheng
 URL-REGEX,^https?://(discardrp|startup)\.umetrip\.com/gateway/api/umetrip/native
 URL-REGEX,^https?://(discardrp|startup)\.umetrip\.com/gateway/api/umetrip/native
-URL-REGEX,^https?://(114\.115\.217\.129)|(home\.umetrip\.com)/gateway/api/umetrip/native$
+URL-REGEX,^https?://(114\.115\.217\.129|home\.umetrip\.com)/gateway/api/umetrip/native$
 
 
 # >> iQiyi
 # >> iQiyi
 URL-REGEX,^https?://act\.vip\.iqiyi\.com/interact/api/show\.do
 URL-REGEX,^https?://act\.vip\.iqiyi\.com/interact/api/show\.do
@@ -176,6 +177,7 @@ URL-REGEX,^https?://api\.daydaycook\.com\.cn/daydaycook/server/ad/
 URL-REGEX,^https?://cms\.daydaycook\.com\.cn/api/cms/advertisement/
 URL-REGEX,^https?://cms\.daydaycook\.com\.cn/api/cms/advertisement/
 # >> eLong
 # >> eLong
 URL-REGEX,^https?://mobile-api2011\.elong\.com/(adgateway|adv)/
 URL-REGEX,^https?://mobile-api2011\.elong\.com/(adgateway|adv)/
+URL-REGEX,^http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/(adgateway|adv)/
 # >> Flyer Tea
 # >> Flyer Tea
 URL-REGEX,^https?://www\.flyertea\.com/source/plugin/mobile/mobile\.php\?module=advis
 URL-REGEX,^https?://www\.flyertea\.com/source/plugin/mobile/mobile\.php\?module=advis
 # >> Foodie
 # >> Foodie
@@ -340,4 +342,4 @@ URL-REGEX,^https?://dspsdk\.abreader\.com/v\d/api/ad\?
 # URL-REGEX,^https?://itunes\.apple\.com/lookup\?id=575826903
 # URL-REGEX,^https?://itunes\.apple\.com/lookup\?id=575826903
 URL-REGEX,^https?://mi\.gdt\.qq\.com/gdt_mview\.fcg
 URL-REGEX,^https?://mi\.gdt\.qq\.com/gdt_mview\.fcg
 # >> Kugou Music
 # >> Kugou Music
-URL-REGEX,^https?://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/(EcomResourceServer/AdPlayPage/adinfo|MobileAdServer/)
+URL-REGEX,^http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/(EcomResourceServer/AdPlayPage/adinfo|MobileAdServer/)

Some files were not shown because too many files changed in this diff