build-reject-domainset.js 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. const { simpleGet } = require('./util-http-get');
  2. const { promises: fsPromises } = require('fs');
  3. const { resolve: pathResolve } = require('path');
  4. let cliProgress;
  5. try {
  6. cliProgress = require('cli-progress');
  7. } catch (e) {
  8. console.log('Dependencies not found');
  9. console.log('"npm i cli-progress" then try again!');
  10. console.error(e);
  11. process.exit(1);
  12. }
  13. /**
  14. * @param {string | URL} hostsUrl
  15. */
  16. async function processHosts(hostsUrl, includeAllSubDomain = false) {
  17. if (typeof hostsUrl === 'string') {
  18. hostsUrl = new URL(hostsUrl);
  19. }
  20. /** @type Set<string> */
  21. const domainSets = new Set();
  22. /** @type string[] */
  23. const hosts = (await simpleGet.https(hostsUrl)).split('\n');
  24. hosts.forEach(line => {
  25. if (line.startsWith('#')) {
  26. return;
  27. }
  28. if (line.startsWith(' ') || line === '' || line.startsWith('\r') || line.startsWith('\n')) {
  29. return;
  30. }
  31. const [, ...domains] = line.split(' ');
  32. domainSets.add(`${includeAllSubDomain ? '.' : ''}${domains.join(' ')}`);
  33. });
  34. return [...domainSets];
  35. }
  36. /**
  37. * @param {string | URL} filterRulesUrl
  38. * @returns {Promise<{ white: string[], black: string[] }>}
  39. */
  40. async function processFilterRules(filterRulesUrl) {
  41. if (typeof filterRulesUrl === 'string') {
  42. filterRulesUrl = new URL(filterRulesUrl);
  43. }
  44. /** @type Set<string> */
  45. const whitelistDomainSets = new Set();
  46. /** @type Set<string> */
  47. const blacklistDomainSets = new Set();
  48. /** @type string[] */
  49. const filterRules = (await simpleGet.https(filterRulesUrl.hostname, filterRulesUrl.pathname)).split('\n');
  50. filterRules.forEach(line => {
  51. if (line.startsWith('#') || line.startsWith('!')) {
  52. return;
  53. }
  54. if (line.startsWith(' ') || line === '' || line.startsWith('\r') || line.startsWith('\n')) {
  55. return;
  56. }
  57. if (!line.includes('*') && !line.includes('//')) {
  58. if (line.startsWith('@@||') && line.endsWith('^')) {
  59. whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`);
  60. } else if (line.startsWith('||') && line.endsWith('^')) {
  61. blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`);
  62. }
  63. }
  64. });
  65. return {
  66. white: [...whitelistDomainSets],
  67. black: [...blacklistDomainSets]
  68. };
  69. }
  70. (async () => {
  71. /** @type Set<string> */
  72. const domainSets = new Set();
  73. // Parse from remote hosts
  74. (await Promise.all([
  75. processHosts('https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=1&mimetype=plaintext', true),
  76. processHosts('https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt'),
  77. processHosts('https://cdn.jsdelivr.net/gh/neoFelhz/neohosts@gh-pages/full/hosts'),
  78. processHosts('https://adaway.org/hosts.txt')
  79. ])).forEach(hosts => {
  80. hosts.forEach(host => {
  81. if (host) {
  82. domainSets.add(host);
  83. }
  84. });
  85. });
  86. console.log(`Import ${domainSets.size} rules from hosts files!`);
  87. console.log(`Start importing rules from reject_sukka.conf!`);
  88. await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => {
  89. data.split('\n').forEach(line => {
  90. if (
  91. line.startsWith('#')
  92. || line.startsWith(' ')
  93. || line === ''
  94. || line.startsWith('\r')
  95. || line.startsWith('\n')
  96. ) {
  97. return;
  98. }
  99. /* if (domainSets.has(line) || domainSets.has(`.${line}`)) {
  100. console.warn(`|${line}| is already in the list!`);
  101. } */
  102. domainSets.add(line);
  103. });
  104. });
  105. // Parse from AdGuard Filters
  106. /** @type Set<string> */
  107. const filterRuleWhitelistDomainSets = new Set();
  108. /** @type Set<string> */
  109. const filterRuleBlacklistDomainSets = new Set();
  110. (await Promise.all([
  111. processFilterRules('https://easylist.to/easylist/easylist.txt'),
  112. processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'),
  113. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_11_Mobile/filter.txt'),
  114. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_3_Spyware/filter.txt'),
  115. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_2_English/filter.txt'),
  116. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt')
  117. ])).forEach(({ white, black }) => {
  118. white.forEach(i => filterRuleWhitelistDomainSets.add(i));
  119. black.forEach(i => filterRuleBlacklistDomainSets.add(i));
  120. });
  121. for (const black of filterRuleBlacklistDomainSets) {
  122. domainSets.add(`.${black}`);
  123. }
  124. console.log(`Import ${filterRuleBlacklistDomainSets.size} rules from adguard filters!`);
  125. // Read DOMAIN Keyword
  126. const domainKeywordsSet = new Set();
  127. await fsPromises.readFile(pathResolve(__dirname, '../List/non_ip/reject.conf'), { encoding: 'utf-8' }).then(data => {
  128. data.split('\n').forEach(line => {
  129. if (line.startsWith('DOMAIN-KEYWORD')) {
  130. const [, ...keywords] = line.split(',');
  131. domainKeywordsSet.add(keywords.join(','));
  132. }
  133. });
  134. });
  135. // Dedupe domainSets
  136. console.log(`Start deduping!`);
  137. const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
  138. const domainSetsClone = [...domainSets];
  139. const len = domainSetsClone.length;
  140. bar2.start(len, 0);
  141. for (const domain of domainSets) {
  142. let shouldContinue = false;
  143. for (const white of filterRuleWhitelistDomainSets) {
  144. if (domain.includes(white) || white.includes(domain)) {
  145. domainSets.delete(domain);
  146. shouldContinue = true;
  147. break;
  148. }
  149. }
  150. if (shouldContinue) {
  151. continue;
  152. }
  153. for (const keyword of domainKeywordsSet) {
  154. if (domain.includes(keyword) || keyword.includes(domain)) {
  155. domainSets.delete(domain);
  156. shouldContinue = true;
  157. break;
  158. }
  159. }
  160. if (shouldContinue) {
  161. continue;
  162. }
  163. for (const domain2 of domainSets) {
  164. if (domain2.startsWith('.') && domain2 !== domain && (domain.endsWith(domain2) || `.${domain}` === domain2)) {
  165. domainSets.delete(domain);
  166. break;
  167. }
  168. }
  169. bar2.increment();
  170. }
  171. bar2.stop();
  172. return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), `${[...domainSets].join('\n')}\n`);
  173. })();