Browse Source

Chore: read request stream line by line

SukkaW 2 years ago
parent
commit
bf4c92cc5d

+ 12 - 11
Build/build-anti-bogus-domain.js

@@ -1,23 +1,24 @@
-const { fetchWithRetry } = require('./lib/fetch-retry');
+// @ts-check
 const fs = require('fs');
 const fs = require('fs');
 const path = require('path');
 const path = require('path');
 const { isIPv4, isIPv6 } = require('net');
 const { isIPv4, isIPv6 } = require('net');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { withBannerArray } = require('./lib/with-banner');
 const { withBannerArray } = require('./lib/with-banner');
+const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
 
 
 (async () => {
 (async () => {
   console.time('Total Time - build-anti-bogus-domain');
   console.time('Total Time - build-anti-bogus-domain');
-  console.time('* Download bogus-nxdomain-list')
-  const res = (await (await fetchWithRetry('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')).text())
-    .split('\n')
-    .map(line => {
-      if (line.startsWith('bogus-nxdomain=')) {
-        return line.replace('bogus-nxdomain=', '');
-      }
+  console.time('* Download bogus-nxdomain-list');
 
 
-      return null
-    })
-    .filter(ip => typeof ip === 'string');
+  const rl = await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf');
+
+  /** @type {string[]} */
+  const res = [];
+  for await (const line of rl) {
+    if (line.startsWith('bogus-nxdomain=')) {
+      res.push(line.replace('bogus-nxdomain=', ''));
+    }
+  }
 
 
   console.timeEnd('* Download bogus-nxdomain-list')
   console.timeEnd('* Download bogus-nxdomain-list')
 
 

+ 13 - 11
Build/build-apple-cdn.js

@@ -1,24 +1,26 @@
-const { fetchWithRetry } = require('./lib/fetch-retry');
-const fs = require('fs');
 const path = require('path');
 const path = require('path');
 
 
 const { isDomainLoose } = require('./lib/is-domain-loose');
 const { isDomainLoose } = require('./lib/is-domain-loose');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { withBannerArray } = require('./lib/with-banner');
 const { withBannerArray } = require('./lib/with-banner');
 
 
+const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
+
 (async () => {
 (async () => {
   console.time('Total Time - build-apple-cdn-conf');
   console.time('Total Time - build-apple-cdn-conf');
 
 
-  const res = (await (await fetchWithRetry('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf')).text())
-    .split('\n')
-    .map(line => {
-      if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
-        return line.replace('server=/', '').replace('/114.114.114.114', '');
-      }
+  const rl = await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf');
 
 
-      return null
-    })
-    .filter(domain => typeof domain === 'string' && isDomainLoose(domain));
+  /** @type {string[]} */
+  const res = [];
+  for await (const line of rl) {
+    if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
+      const domain = line.replace('server=/', '').replace('/114.114.114.114', '');
+      if (isDomainLoose(domain)) {
+        res.push(domain);
+      }
+    }
+  }
 
 
   await Promise.all([
   await Promise.all([
     compareAndWriteFile(
     compareAndWriteFile(

+ 20 - 0
Build/lib/fetch-remote-text-by-line.js

@@ -0,0 +1,20 @@
+// @ts-check
+const { fetchWithRetry } = require('./fetch-retry');
+const readline = require('readline');
+const { Readable } = require('stream');
+
+/**
+ * @param {import('undici').RequestInfo} url
+ * @param {import('undici').RequestInit | undefined} [opt]
+ */
+module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
+  const resp = await fetchWithRetry(url, opt);
+  if (!resp.body) {
+    throw new Error('Failed to fetch remote text');
+  }
+
+  return readline.createInterface({
+    input: Readable.fromWeb(resp.body),
+    crlfDelay: Infinity
+  });
+}

+ 1 - 1
Build/lib/fetch-retry.js

@@ -1,4 +1,4 @@
 // @ts-check
 // @ts-check
 const { fetch } = require('undici');
 const { fetch } = require('undici');
-const fetchWithRetry = require('@vercel/fetch-retry')(fetch);
+const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
 module.exports.fetchWithRetry = fetchWithRetry;
 module.exports.fetchWithRetry = fetchWithRetry;

+ 9 - 8
Build/lib/parse-filter.js

@@ -1,5 +1,6 @@
 // @ts-check
 // @ts-check
 const { fetchWithRetry } = require('./fetch-retry');
 const { fetchWithRetry } = require('./fetch-retry');
+const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
 const { NetworkFilter } = require('@cliqz/adblocker');
 const { NetworkFilter } = require('@cliqz/adblocker');
 const { normalizeDomain } = require('./is-domain-loose');
 const { normalizeDomain } = require('./is-domain-loose');
 
 
@@ -26,9 +27,10 @@ async function processDomainLists(domainListsUrl) {
 
 
   /** @type Set<string> */
   /** @type Set<string> */
   const domainSets = new Set();
   const domainSets = new Set();
-  /** @type string[] */
-  const domains = (await (await fetchWithRetry(domainListsUrl)).text()).split('\n');
-  domains.forEach(line => {
+
+  const rl = await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl);
+
+  for await (const line of rl) {
     if (
     if (
       line.startsWith('#')
       line.startsWith('#')
       || line.startsWith('!')
       || line.startsWith('!')
@@ -48,7 +50,7 @@ async function processDomainLists(domainListsUrl) {
     }
     }
 
 
     domainSets.add(domainToAdd);
     domainSets.add(domainToAdd);
-  });
+  }
 
 
   return [...domainSets];
   return [...domainSets];
 }
 }
@@ -66,9 +68,8 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
   /** @type Set<string> */
   /** @type Set<string> */
   const domainSets = new Set();
   const domainSets = new Set();
 
 
-  /** @type string[] */
-  const hosts = (await (await fetchWithRetry(hostsUrl)).text()).split('\n');
-  hosts.forEach(line => {
+  const rl = await fetchRemoteTextAndCreateReadlineInterface(hostsUrl);
+  for await (const line of rl) {
     if (line.includes('#')) {
     if (line.includes('#')) {
       return;
       return;
     }
     }
@@ -91,7 +92,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
         domainSets.add(domain);
         domainSets.add(domain);
       }
       }
     }
     }
-  });
+  }
 
 
   console.timeEnd(`   - processHosts: ${hostsUrl}`);
   console.timeEnd(`   - processHosts: ${hostsUrl}`);
 
 

+ 2 - 2
Build/lib/reject-data-source.js

@@ -8,7 +8,7 @@ const HOSTS = [
   ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false]
   ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false]
 ]
 ]
 
 
-const ADGUARD_FILTERS = [
+const ADGUARD_FILTERS = /** @type {const} */([
   // Easy List
   // Easy List
   [
   [
     'https://easylist.to/easylist/easylist.txt',
     'https://easylist.to/easylist/easylist.txt',
@@ -177,7 +177,7 @@ const ADGUARD_FILTERS = [
   'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt',
   'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt',
   'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty-cname.txt',
   'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty-cname.txt',
   'https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt'
   'https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt'
-];
+]);
 
 
 const PREDEFINED_WHITELIST = [
 const PREDEFINED_WHITELIST = [
   'localhost',
   'localhost',