| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- import { readFileByLine } from './lib/fetch-text-by-line';
- import pathFn from 'path';
- import table from 'table';
- import listDir from '@sukka/listdir';
- import { green, yellow } from 'picocolors';
- import { processLineFromReadline } from './lib/process-line';
- import { getHostname } from 'tldts';
- const PRESET_MITM_HOSTNAMES = [
- // '*baidu.com',
- '*.ydstatic.com',
- // '*snssdk.com',
- // '*musical.com',
- // '*musical.ly',
- // '*snssdk.ly',
- 'api.zhihu.com',
- 'www.zhihu.com',
- 'api.chelaile.net.cn',
- 'atrace.chelaile.net.cn',
- '*.meituan.net',
- 'ctrl.playcvn.com',
- 'ctrl.playcvn.net',
- 'ctrl.zmzapi.com',
- 'ctrl.zmzapi.net',
- 'api.zhuishushenqi.com',
- 'b.zhuishushenqi.com',
- 'ggic.cmvideo.cn',
- 'ggic2.cmvideo.cn',
- 'mrobot.pcauto.com.cn',
- 'mrobot.pconline.com.cn',
- 'home.umetrip.com',
- 'discardrp.umetrip.com',
- 'startup.umetrip.com',
- 'dsp-x.jd.com',
- 'bdsp-x.jd.com'
- ];
- (async () => {
- const folderListPath = pathFn.resolve(__dirname, '../List/');
- const rulesets = await listDir(folderListPath);
- const urlRegexPaths: Array<{ origin: string, processed: string }> = [];
- await Promise.all(rulesets.map(async file => {
- const content = await processLineFromReadline(readFileByLine(pathFn.join(folderListPath, file)));
- urlRegexPaths.push(
- ...content
- .filter(i => (
- i.startsWith('URL-REGEX')
- && !i.includes('http://')
- ))
- .map(i => i.split(',')[1])
- .map(i => ({
- origin: i,
- processed: i
- .replaceAll('^https?://', '')
- .replaceAll('^https://', '')
- .replaceAll('^http://', '')
- .split('/')[0]
- .replaceAll('\\.', '.')
- .replaceAll('.+', '*')
- .replaceAll('\\d', '*')
- .replaceAll('([a-z])', '*')
- .replaceAll('[a-z]', '*')
- .replaceAll('([0-9])', '*')
- .replaceAll('[0-9]', '*')
- .replaceAll(/{.+?}/g, '')
- .replaceAll(/\*+/g, '*')
- }))
- );
- }));
- const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
- const parsedFailures = new Set();
- const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
- dedupedUrlRegexPaths.forEach(i => {
- const result = getHostnameSafe(i.processed);
- if (result) {
- mitmDomains.add(result);
- } else {
- parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
- }
- });
- const mitmDomainsRegExpArray = Array.from(mitmDomains)
- .slice()
- .filter(i => {
- return i.length > 3
- && !i.includes('.mp4') // Special Case
- && i !== '(www.)' // Special Case
- && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
- && !i.startsWith('.')
- && !i.endsWith('.')
- && !i.endsWith('*');
- })
- .map(i => {
- return new RegExp(
- escapeRegExp(i)
- .replaceAll('{www or not}', '(www.)?')
- .replaceAll('\\*', '(.*)')
- );
- });
- const parsedDomainsData: Array<[string, string]> = [];
- dedupedUrlRegexPaths.forEach(i => {
- const result = getHostnameSafe(i.processed);
- if (result) {
- if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
- parsedDomainsData.push([green(result), i.origin]);
- } else {
- parsedDomainsData.push([yellow(result), i.origin]);
- }
- }
- });
- console.log('Mitm Hostnames:');
- console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
- console.log('--------------------');
- console.log('Parsed Sucessed:');
- console.log(table.table(parsedDomainsData, {
- border: table.getBorderCharacters('void'),
- columnDefault: {
- paddingLeft: 0,
- paddingRight: 3
- },
- drawHorizontalLine: () => false
- }));
- console.log('--------------------');
- console.log('Parsed Failed');
- console.log([...parsedFailures].join('\n'));
- })();
- /** Util function */
- function getHostnameSafe(input: string) {
- const res = getHostname(input);
- if (res && /[^\s\w*.-]/.test(res)) return null;
- return res;
- }
- function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
- for (const r of regexps) {
- if (r.test(input)) return true;
- }
- return false;
- }
- function escapeRegExp(string = '') {
- const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
- const reHasRegExpChar = new RegExp(reRegExpChar.source);
- return string && reHasRegExpChar.test(string)
- ? string.replaceAll(reRegExpChar, '\\$&')
- : string;
- }
|