build-reject-domainset.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. const { promises: fsPromises } = require('fs');
  2. const fse = require('fs-extra');
  3. const { resolve: pathResolve } = require('path');
  4. const Piscina = require('piscina');
  5. const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
  6. const cpuCount = require('os').cpus().length;
  7. const { isCI } = require('ci-info');
  8. const threads = isCI ? cpuCount : cpuCount / 2;
  9. const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST } = require('./lib/reject-data-source');
  10. const { withBanner } = require('./lib/with-banner');
  11. const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
  12. (async () => {
  13. console.time('Total Time - build-reject-domain-set');
  14. /** @type Set<string> */
  15. const domainSets = new Set();
  16. console.log('Downloading hosts file...');
  17. console.time('* Download and process Hosts');
  18. // Parse from remote hosts & domain lists
  19. (await Promise.all(
  20. HOSTS.map(entry => processHosts(entry[0], entry[1]))
  21. )).forEach(hosts => {
  22. hosts.forEach(host => {
  23. if (host) {
  24. domainSets.add(host);
  25. }
  26. });
  27. });
  28. console.timeEnd('* Download and process Hosts');
  29. let previousSize = domainSets.size;
  30. console.log(`Import ${previousSize} rules from hosts files!`);
  31. // Parse from AdGuard Filters
  32. console.time('* Download and process AdBlock Filter Rules');
  33. await Promise.all(ADGUARD_FILTERS.map(input => {
  34. const promise = Array.isArray(input) && input.length === 2
  35. ? processFilterRules(input[0], input[1])
  36. : processFilterRules(input);
  37. return promise.then((i) => {
  38. if (i) {
  39. const { white, black, foundDebugDomain } = i;
  40. if (foundDebugDomain) {
  41. process.exit(1);
  42. };
  43. white.forEach(i => filterRuleWhitelistDomainSets.add(i));
  44. black.forEach(i => domainSets.add(i));
  45. } else {
  46. process.exit(1);
  47. }
  48. });
  49. }));
  50. console.timeEnd('* Download and process AdBlock Filter Rules');
  51. previousSize = domainSets.size - previousSize;
  52. console.log(`Import ${previousSize} rules from adguard filters!`);
  53. await fsPromises.readFile(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => {
  54. data.split('\n').forEach(line => {
  55. const trimmed = line.trim();
  56. if (
  57. line.startsWith('#')
  58. || line.startsWith(' ')
  59. || line.startsWith('\r')
  60. || line.startsWith('\n')
  61. || trimmed === ''
  62. ) {
  63. return;
  64. }
  65. domainSets.add(trimmed);
  66. });
  67. });
  68. // Copy reject_sukka.conf for backward compatibility
  69. await fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf'))
  70. previousSize = domainSets.size - previousSize;
  71. console.log(`Import ${previousSize} rules from reject_sukka.conf!`);
  72. // Read DOMAIN Keyword
  73. const domainKeywordsSet = new Set();
  74. const domainSuffixSet = new Set();
  75. await fsPromises.readFile(pathResolve(__dirname, '../List/non_ip/reject.conf'), { encoding: 'utf-8' }).then(data => {
  76. data.split('\n').forEach(line => {
  77. if (line.startsWith('DOMAIN-KEYWORD')) {
  78. const [, ...keywords] = line.split(',');
  79. domainKeywordsSet.add(keywords.join(',').trim());
  80. } else if (line.startsWith('DOMAIN-SUFFIX')) {
  81. const [, ...keywords] = line.split(',');
  82. domainSuffixSet.add(keywords.join(',').trim());
  83. }
  84. });
  85. });
  86. // Read Special Phishing Suffix list
  87. await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_phishing.conf'), { encoding: 'utf-8' }).then(data => {
  88. data.split('\n').forEach(line => {
  89. const trimmed = line.trim();
  90. if (
  91. line.startsWith('#')
  92. || line.startsWith(' ')
  93. || line.startsWith('\r')
  94. || line.startsWith('\n')
  95. || trimmed === ''
  96. ) {
  97. return;
  98. }
  99. domainSuffixSet.add(trimmed);
  100. });
  101. });
  102. console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
  103. previousSize = domainSets.size;
  104. // Dedupe domainSets
  105. console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
  106. console.time(`* Dedupe from black keywords/suffixes`);
  107. for (const domain of domainSets) {
  108. let isTobeRemoved = false;
  109. for (const suffix of domainSuffixSet) {
  110. if (domain.endsWith(suffix)) {
  111. isTobeRemoved = true;
  112. break;
  113. }
  114. }
  115. if (!isTobeRemoved) {
  116. for (const keyword of domainKeywordsSet) {
  117. if (domain.includes(keyword)) {
  118. isTobeRemoved = true;
  119. break;
  120. }
  121. }
  122. }
  123. if (!isTobeRemoved) {
  124. if (isInWhiteList(domain)) {
  125. isTobeRemoved = true;
  126. }
  127. }
  128. if (isTobeRemoved) {
  129. domainSets.delete(domain);
  130. }
  131. }
  132. console.timeEnd(`* Dedupe from black keywords/suffixes`);
  133. console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
  134. previousSize = domainSets.size;
  135. // Dedupe domainSets
  136. console.log(`Start deduping! (${previousSize})`);
  137. const START_TIME = Date.now();
  138. const piscina = new Piscina({
  139. filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
  140. workerData: preprocessFullDomainSetBeforeUsedAsWorkerData([...domainSets]),
  141. idleTimeout: 50,
  142. minThreads: threads,
  143. maxThreads: threads
  144. });
  145. console.log(`Launching ${threads} threads...`)
  146. const tasksArray = Array.from(domainSets)
  147. .reduce((result, element, index) => {
  148. const chunk = index % threads;
  149. result[chunk] ??= [];
  150. result[chunk].push(element);
  151. return result;
  152. }, []);
  153. (
  154. await Promise.all(
  155. Array.from(domainSets)
  156. .reduce((result, element, index) => {
  157. const chunk = index % threads;
  158. result[chunk] ??= [];
  159. result[chunk].push(element);
  160. return result;
  161. }, [])
  162. .map(chunk => piscina.run({ chunk }, { name: 'dedupe' }))
  163. )
  164. ).forEach((result, taskIndex) => {
  165. const chunk = tasksArray[taskIndex];
  166. for (let i = 0, len = result.length; i < len; i++) {
  167. if (result[i]) {
  168. domainSets.delete(chunk[i]);
  169. }
  170. }
  171. });
  172. console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
  173. console.log(`Deduped ${previousSize - domainSets.size} rules!`);
  174. console.time('* Write reject.conf');
  175. await Promise.all([
  176. fsPromises.writeFile(
  177. pathResolve(__dirname, '../List/domainset/reject.conf'),
  178. withBanner(
  179. 'Reject Domain Set for Surge',
  180. [
  181. '(AdBlock, Tracking Protection, Privacy Protection, Anti-Phishing, Anti-Mining)',
  182. 'Build from:',
  183. ...HOSTS.map(host => `- ${host[0]}`),
  184. ...ADGUARD_FILTERS.map(filter => `- ${Array.isArray(filter) ? filter[0] : filter}`),
  185. ],
  186. new Date(),
  187. [...domainSets].sort()
  188. ),
  189. { encoding: 'utf-8' }
  190. ),
  191. piscina.destroy()
  192. ]);
  193. console.timeEnd('* Write reject.conf');
  194. console.timeEnd('Total Time - build-reject-domain-set');
  195. if (piscina.queueSize === 0) {
  196. process.exit(0);
  197. }
  198. })();
  199. function isInWhiteList (domain) {
  200. for (const white of filterRuleWhitelistDomainSets) {
  201. if (domain === white || domain.endsWith(white)) {
  202. return true;
  203. }
  204. }
  205. return false;
  206. }