Browse Source

Minor changes

SukkaW 2 years ago
parent
commit
897a505c32

+ 7 - 27
Build/build-internal-cdn-rules.ts

@@ -1,38 +1,18 @@
 import path from 'path';
-import * as tldts from 'tldts';
 import { processLine } from './lib/process-line';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { sortDomains } from './lib/stable-sort-domain';
 import { task } from './lib/trace-runner';
 import { compareAndWriteFile } from './lib/create-file';
 import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
-// const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
 
 const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
 
-const addApexDomain = (input: string, set: Set<string>) => {
-  // We are including the private domains themselves
-  const d = tldts.getDomain(input, { allowPrivateDomains: false });
-  if (d) {
-    set.add(d);
-  }
-};
-
 const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => {
-  for await (const line of readFileByLine(domainSetPath)) {
-    // console.log({ line });
-
-    const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false });
-    if (parsed.isIp) continue;
-    if (parsed.isIcann || parsed.isPrivate) {
-      if (parsed.domain) {
-        set.add(parsed.domain);
-      }
-      continue;
-    }
-
-    if (processLine(line)) {
-      console.warn('[drop line from domainset]', line);
+  for await (const l of readFileByLine(domainSetPath)) {
+    const line = processLine(l);
+    if (line) {
+      set.add(line[0] === '.' ? line.slice(1) : line);
     }
   }
 };
@@ -40,12 +20,12 @@ const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) =>
 const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => {
   for await (const line of readFileByLine(ruleSetPath)) {
     if (line.startsWith('DOMAIN-SUFFIX,')) {
-      addApexDomain(line.replace('DOMAIN-SUFFIX,', ''), set);
+      set.add(line.replace('DOMAIN-SUFFIX,', ''));
     } else if (line.startsWith('DOMAIN,')) {
-      addApexDomain(line.replace('DOMAIN,', ''), set);
+      set.add(line.replace('DOMAIN,', ''));
     } else if (line.startsWith('DOMAIN-KEYWORD')) {
       keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', '')));
-    } else if (line.startsWith('USER-AGENT,') || line.startsWith('PROCESS-NAME,') || line.startsWith('URL-REGEX,')) {
+    } else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,')) {
       // do nothing
     } else if (processLine(line)) {
       console.warn('[drop line from ruleset]', line);

+ 15 - 15
Build/build-reject-domainset.ts

@@ -23,8 +23,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
 export const buildRejectDomainSet = task(import.meta.path, async () => {
   /** Whitelists */
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
-  const domainKeywordsSet = new Set<string>();
-  const domainSuffixSet = new Set<string>();
 
   const domainSets = new Set<string>();
 
@@ -95,21 +93,23 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
   let previousSize = domainSets.size;
   console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
 
-  for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
-    const [type, keyword] = line.split(',');
-
-    if (type === 'DOMAIN-KEYWORD') {
-      domainKeywordsSet.add(keyword.trim());
-    } else if (type === 'DOMAIN-SUFFIX') {
-      domainSuffixSet.add(keyword.trim());
+  // Dedupe domainSets
+  await traceAsync('* Dedupe from black keywords/suffixes', async () => {
+  /** Collect DOMAIN-SUFFIX from non_ip/reject.conf for deduplication */
+    const domainSuffixSet = new Set<string>();
+    /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
+    const domainKeywordsSet = new Set<string>();
+
+    for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
+      const [type, keyword] = line.split(',');
+
+      if (type === 'DOMAIN-KEYWORD') {
+        domainKeywordsSet.add(keyword.trim());
+      } else if (type === 'DOMAIN-SUFFIX') {
+        domainSuffixSet.add(keyword.trim());
+      }
     }
-  }
 
-  console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
-
-  previousSize = domainSets.size;
-  // Dedupe domainSets
-  traceSync('* Dedupe from black keywords/suffixes', () => {
     const trie1 = createTrie(domainSets);
 
     domainSuffixSet.forEach(suffix => {

+ 1 - 1
Build/build-speedtest-domainset.ts

@@ -24,7 +24,7 @@ const latestTopUserAgentsPromise = fsCache.apply(
   {
     serializer: serializeArray,
     deserializer: deserializeArray,
-    ttl: TTL.ONE_DAY()
+    ttl: TTL.THREE_DAYS()
   }
 );
 

+ 7 - 7
Build/download-previous-build.ts

@@ -1,7 +1,6 @@
-import fs from 'fs';
-import fsp from 'fs/promises';
+import { existsSync, createWriteStream } from 'fs';
+import { mkdir } from 'fs/promises';
 import path from 'path';
-import { Readable } from 'stream';
 import { pipeline } from 'stream/promises';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { isCI } from 'ci-info';
@@ -9,6 +8,7 @@ import { task } from './lib/trace-runner';
 import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
 import tarStream from 'tar-stream';
 import zlib from 'zlib';
+import { Readable } from 'stream';
 
 const IS_READING_BUILD_OUTPUT = 1 << 2;
 const ALL_FILES_EXISTS = 1 << 3;
@@ -31,7 +31,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
 
     if (!isCI) {
       // Bun.file().exists() doesn't check directory
-      if (!fs.existsSync(path.join(import.meta.dir, '..', line))) {
+      if (!existsSync(path.join(import.meta.dir, '..', line))) {
         flag = flag & ~ALL_FILES_EXISTS;
       }
     }
@@ -57,7 +57,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
   const extract = tarStream.extract();
   const gunzip = zlib.createGunzip();
   pipeline(
-    resp.body as any,
+    Readable.fromWeb(resp.body) as any,
     gunzip,
     extract
   );
@@ -78,10 +78,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
     const relativeEntryPath = entry.header.name.replace(pathPrefix, '');
     const targetPath = path.join(import.meta.dir, '..', relativeEntryPath);
 
-    await fsp.mkdir(path.dirname(targetPath), { recursive: true });
+    await mkdir(path.dirname(targetPath), { recursive: true });
     await pipeline(
       entry as any,
-      fs.createWriteStream(targetPath)
+      createWriteStream(targetPath)
     );
   }
 });

+ 12 - 14
Build/download-publicsuffixlist.ts

@@ -3,18 +3,16 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
 import { createMemoizedPromise } from './lib/memo-promise';
 import { traceAsync } from './lib/trace-runner';
 
-export const getPublicSuffixListTextPromise = createMemoizedPromise(
-  () => traceAsync(
-    'obtain public_suffix_list',
-    () => fsCache.apply(
-      'https://publicsuffix.org/list/public_suffix_list.dat',
-      () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
-      {
-        // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
-        // Though the action runs every 24 hours, the IANA list is updated every 7 days.
-        // So a 3 day TTL should be enough.
-        ttl: TTL.THREE_DAYS()
-      }
-    )
+export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync(
+  'obtain public_suffix_list',
+  () => fsCache.apply(
+    'https://publicsuffix.org/list/public_suffix_list.dat',
+    () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
+    {
+      // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
+      // Though the action runs every 24 hours, the IANA list is updated every 7 days.
+      // So a 3 day TTL should be enough.
+      ttl: TTL.THREE_DAYS()
+    }
   )
-);
+));

+ 4 - 3
Build/lib/cache-filesystem.ts

@@ -2,8 +2,9 @@
 import { Database } from 'bun:sqlite';
 import os from 'os';
 import path from 'path';
-import fs from 'fs';
+import { mkdirSync } from 'fs';
 import picocolors from 'picocolors';
+import { traceSync } from './trace-runner';
 
 const identity = (x: any) => x;
 
@@ -64,7 +65,7 @@ export class Cache {
 
   constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) {
     this.cachePath = cachePath;
-    fs.mkdirSync(this.cachePath, { recursive: true });
+    mkdirSync(this.cachePath, { recursive: true });
     if (tbd != null) this.tbd = tbd;
 
     const db = new Database(path.join(this.cachePath, 'cache.db'));
@@ -151,7 +152,7 @@ export class Cache {
   }
 }
 
-export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') });
+export const fsCache = traceSync('initializing filesystem cache', () => new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') }));
 // process.on('exit', () => {
 //   fsCache.destroy();
 // });

+ 1 - 0
Build/lib/cached-tld-parse.ts

@@ -12,6 +12,7 @@ const sharedConfig2 = { allowPrivateDomains: true, detectIp: false };
 export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
 /** { allowPrivateDomains: true, detectIp: false } */
 export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2));
+export const parseWithoutDetectIp = parse2;
 
 let gothillGetDomainCache: ReturnType<typeof createCache> | null = null;
 export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {

+ 3 - 3
Build/lib/create-file.ts

@@ -18,7 +18,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
     isEqual = false;
   } else {
     isEqual = await traceAsync(
-      picocolors.gray(`Comparing ${filePath}`),
+      picocolors.gray(`comparing ${filePath}`),
       async () => {
         let index = 0;
 
@@ -62,11 +62,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
   }
 
   if (isEqual) {
-    console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`));
+    console.log(picocolors.dim(`same content, bail out writing: ${filePath}`));
     return;
   }
 
-  await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => {
+  await traceAsync(picocolors.gray(`writing ${filePath}`), async () => {
     if (linesALen < 10000) {
       return Bun.write(file, `${linesA.join('\n')}\n`);
     }

+ 28 - 28
Build/lib/fetch-text-by-line.ts

@@ -3,34 +3,34 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
 
 import { TextLineStream } from './text-line-transform-stream';
 import { PolyfillTextDecoderStream } from './text-decoder-stream';
-function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
-  return stream
-    .pipeThrough(new PolyfillTextDecoderStream())
-    .pipeThrough(new TextLineStream());
-}
-
-// const decoder = new TextDecoder('utf-8');
-// async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
-//   let buf = '';
-
-//   for await (const chunk of stream) {
-//     const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
-//     for (let i = 0, len = chunkStr.length; i < len; i++) {
-//       const char = chunkStr[i];
-//       if (char === '\n') {
-//         yield buf;
-//         buf = '';
-//       } else {
-//         buf += char;
-//       }
-//     }
-//   }
-
-//   if (buf) {
-//     yield buf;
-//   }
+// function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
+//   return stream
+//     .pipeThrough(new PolyfillTextDecoderStream())
+//     .pipeThrough(new TextLineStream());
 // }
 
+const decoder = new TextDecoder('utf-8');
+async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
+  let buf = '';
+
+  for await (const chunk of stream) {
+    const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
+    for (let i = 0, len = chunkStr.length; i < len; i++) {
+      const char = chunkStr[i];
+      if (char === '\n') {
+        yield buf;
+        buf = '';
+      } else {
+        buf += char;
+      }
+    }
+  }
+
+  if (buf) {
+    yield buf;
+  }
+}
+
 export function readFileByLine(file: string | URL | BunFile) {
   if (typeof file === 'string') {
     file = Bun.file(file);
@@ -38,7 +38,7 @@ export function readFileByLine(file: string | URL | BunFile) {
     file = Bun.file(file);
   }
 
-  return createTextLineStreamFromStreamSource(file.stream());
+  return createTextLineAsyncGeneratorFromStreamSource(file.stream());
 }
 
 export function createReadlineInterfaceFromResponse(resp: Response) {
@@ -49,7 +49,7 @@ export function createReadlineInterfaceFromResponse(resp: Response) {
     throw new Error('Body has already been consumed.');
   }
 
-  return createTextLineStreamFromStreamSource(resp.body);
+  return createTextLineAsyncGeneratorFromStreamSource(resp.body);
 }
 
 export function fetchRemoteTextByLine(url: string | URL) {

+ 10 - 13
Build/lib/parse-filter.ts

@@ -56,23 +56,20 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl:
           continue;
         }
 
-        const domain = line.split(/\s/)[1];
+        const _domain = line.split(/\s/)[1]?.trim();
+        if (!_domain) {
+          continue;
+        }
+        const domain = normalizeDomain(_domain);
         if (!domain) {
           continue;
         }
-        const _domain = domain.trim();
-
-        if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
-          console.warn(picocolors.red(hostsUrl), '(black)', _domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
+        if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
+          console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
           foundDebugDomain = true;
         }
 
-        const domainToAdd = normalizeDomain(_domain);
-        if (!domainToAdd) {
-          continue;
-        }
-
-        domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
+        domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
       }
 
       console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
@@ -102,11 +99,11 @@ export async function processFilterRules(
   fallbackUrls?: readonly string[] | undefined | null,
   ttl: number | null = null
 ): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
-  const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[
+  const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<Readonly<[
     white: string[],
     black: string[],
     warningMessages: string[]
-  ]>(
+  ]>>(
     filterRulesUrl,
     async () => {
       const whitelistDomainSets = new Set<string>();

+ 1 - 1
Build/lib/reject-data-source.ts

@@ -4,7 +4,7 @@ export const HOSTS = [
   ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
   ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
   // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
-  ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()],
+  ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
   // have not been updated for more than a year, so we set a 14 days cache ttl
   ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
   ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],

+ 15 - 0
Source/non_ip/domestic.js

@@ -12,6 +12,7 @@ module.exports.DOMESTICS = /** @type {const} */({
       'aliyun.com',
       'aliyuncs.com',
       'alikunlun.com',
+      'cdngslb.com',
       'alipay.com',
       'alipay.cn',
       'alipay.com.cn',
@@ -130,6 +131,18 @@ module.exports.DOMESTICS = /** @type {const} */({
       'bilibilipay.com'
     ]
   },
+  BILIBILI_ALI: {
+    dns: 'quic://223.5.5.5:853',
+    domains: [
+      'upos-sz-mirrorali.bilivideo.com'
+    ]
+  },
+  BILIBILI_BD: {
+    dns: '180.76.76.76',
+    domains: [
+      'upos-sz-mirrorbos.bilivideo.com'
+    ]
+  },
   XIAOMI: {
     dns: 'https://120.53.53.53/dns-query',
     domains: [
@@ -155,6 +168,8 @@ module.exports.DOMESTICS = /** @type {const} */({
       'toutiaoimg.cn',
       'toutiaostatic.com',
       'toutiaovod.com',
+      'toutiaocloud.com',
+      'toutiaopage.com',
       'feiliao.com',
       'iesdouyin.com',
       'pstatp.com',

+ 1 - 0
Source/non_ip/my_reject.conf

@@ -30,6 +30,7 @@ PROCESS-NAME,LemonService
 DEST-PORT,7680
 
 # >> HTTPDNS
+# https://github.com/VirgilClyne/GetSomeFries/wiki/%F0%9F%9A%AB-HTTPDNS
 
 # Aliyun
 DOMAIN,httpdns-api.aliyuncs.com