parse-filter.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. const { fetchWithRetry } = require('./fetch-retry');
  2. const { normalizeDomain } = require('./is-domain-loose');
  3. const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
  4. let foundDebugDomain = false;
  5. const warnOnceUrl = new Set();
  6. const warnOnce = (url, isWhite, ...message) => {
  7. const key = `${url}${isWhite ? 'white' : 'black'}`;
  8. if (warnOnceUrl.has(key)) {
  9. return;
  10. }
  11. warnOnceUrl.add(key);
  12. console.warn(url, isWhite ? '(white)' : '(black)', ...message);
  13. }
  14. /**
  15. * @param {string | URL} domainListsUrl
  16. */
  17. async function processDomainLists (domainListsUrl) {
  18. if (typeof domainListsUrl === 'string') {
  19. domainListsUrl = new URL(domainListsUrl);
  20. }
  21. /** @type Set<string> */
  22. const domainSets = new Set();
  23. /** @type string[] */
  24. const domains = (await (await fetchWithRetry(domainListsUrl)).text()).split('\n');
  25. domains.forEach(line => {
  26. if (
  27. line.startsWith('#')
  28. || line.startsWith('!')
  29. || line.startsWith(' ')
  30. || line === ''
  31. || line.startsWith('\r')
  32. || line.startsWith('\n')
  33. ) {
  34. return;
  35. }
  36. const domainToAdd = line.trim();
  37. if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
  38. warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  39. foundDebugDomain = true;
  40. }
  41. domainSets.add(domainToAdd);
  42. });
  43. return [...domainSets];
  44. }
  45. /**
  46. * @param {string | URL} hostsUrl
  47. */
  48. async function processHosts (hostsUrl, includeAllSubDomain = false) {
  49. console.time(` - processHosts: ${hostsUrl}`);
  50. if (typeof hostsUrl === 'string') {
  51. hostsUrl = new URL(hostsUrl);
  52. }
  53. /** @type Set<string> */
  54. const domainSets = new Set();
  55. /** @type string[] */
  56. const hosts = (await (await fetchWithRetry(hostsUrl)).text()).split('\n');
  57. hosts.forEach(line => {
  58. if (line.includes('#')) {
  59. return;
  60. }
  61. if (line.startsWith(' ') || line.startsWith('\r') || line.startsWith('\n') || line.trim() === '') {
  62. return;
  63. }
  64. const [, ...domains] = line.split(' ');
  65. const _domain = domains.join(' ').trim();
  66. if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  67. warnOnce(hostsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  68. foundDebugDomain = true;
  69. }
  70. const domain = normalizeDomain(_domain);
  71. if (domain) {
  72. if (includeAllSubDomain) {
  73. domainSets.add(`.${domain}`);
  74. } else {
  75. domainSets.add(domain);
  76. }
  77. }
  78. });
  79. console.timeEnd(` - processHosts: ${hostsUrl}`);
  80. return [...domainSets];
  81. }
  82. /**
  83. * @param {string | URL} filterRulesUrl
  84. * @param {(string | URL)[] | undefined} fallbackUrls
  85. * @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
  86. */
  87. async function processFilterRules (filterRulesUrl, fallbackUrls) {
  88. console.time(` - processFilterRules: ${filterRulesUrl}`);
  89. /** @type Set<string> */
  90. const whitelistDomainSets = new Set();
  91. /** @type Set<string> */
  92. const blacklistDomainSets = new Set();
  93. let filterRules;
  94. try {
  95. /** @type string[] */
  96. filterRules = (
  97. await Promise.any(
  98. [filterRulesUrl, ...(fallbackUrls || [])].map(
  99. async url => (await fetchWithRetry(url)).text()
  100. )
  101. )
  102. ).split('\n').map(line => line.trim());
  103. } catch (e) {
  104. console.log('Download Rule for [' + filterRulesUrl + '] failed');
  105. throw e;
  106. }
  107. for (let i = 0, len = filterRules.length; i < len; i++) {
  108. const line = filterRules[i];
  109. const lineStartsWithDoubleVerticalBar = line.startsWith('||');
  110. if (
  111. line === ''
  112. || line.includes('#')
  113. || line.includes('!')
  114. || line.includes('*')
  115. || line.includes('/')
  116. || line.includes('=')
  117. || line.includes('[')
  118. || line.includes('(')
  119. || line.includes('$') && !lineStartsWithDoubleVerticalBar
  120. || line.includes(']')
  121. || line.includes(')')
  122. ) {
  123. continue;
  124. }
  125. const lineEndsWithCaret = line.endsWith('^');
  126. const lineEndsWithCaretVerticalBar = line.endsWith('^|');
  127. if (lineStartsWithDoubleVerticalBar && line.endsWith('^$badfilter')) {
  128. const _domain = line.replace('||', '').replace('^$badfilter', '').trim();
  129. const domain = normalizeDomain(_domain);
  130. if (domain) {
  131. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  132. warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
  133. foundDebugDomain = true;
  134. }
  135. whitelistDomainSets.add(domain);
  136. } else {
  137. console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain');
  138. }
  139. } else if (line.startsWith('@@||')
  140. && (
  141. lineEndsWithCaret
  142. || lineEndsWithCaretVerticalBar
  143. || line.endsWith('^$badfilter')
  144. || line.endsWith('^$1p')
  145. )
  146. ) {
  147. const _domain = line
  148. .replaceAll('@@||', '')
  149. .replaceAll('^$badfilter', '')
  150. .replaceAll('^$1p', '')
  151. .replaceAll('^|', '')
  152. .replaceAll('^', '')
  153. .trim();
  154. const domain = normalizeDomain(_domain);
  155. if (domain) {
  156. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  157. warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
  158. foundDebugDomain = true;
  159. }
  160. whitelistDomainSets.add(domain);
  161. } else {
  162. console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain');
  163. }
  164. } else if (
  165. lineStartsWithDoubleVerticalBar
  166. && (
  167. lineEndsWithCaret
  168. || lineEndsWithCaretVerticalBar
  169. || line.endsWith('^$all')
  170. || line.endsWith('^$doc')
  171. || line.endsWith('^$document')
  172. )
  173. ) {
  174. const _domain = line
  175. .replaceAll('||', '')
  176. .replaceAll('^|', '')
  177. .replaceAll('^$all', '')
  178. .replaceAll('^$document', '')
  179. .replaceAll('^$doc', '')
  180. .replaceAll('^', '')
  181. .trim();
  182. const domain = normalizeDomain(_domain);
  183. if (domain) {
  184. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  185. warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  186. foundDebugDomain = true;
  187. }
  188. blacklistDomainSets.add(`.${domain}`);
  189. }
  190. } else if (
  191. line.startsWith('://')
  192. && (
  193. lineEndsWithCaret
  194. || lineEndsWithCaretVerticalBar
  195. )
  196. ) {
  197. const _domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim();
  198. const domain = normalizeDomain(_domain);
  199. if (domain) {
  200. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  201. warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  202. foundDebugDomain = true;
  203. }
  204. blacklistDomainSets.add(domain);
  205. }
  206. }
  207. }
  208. console.timeEnd(` - processFilterRules: ${filterRulesUrl}`);
  209. return {
  210. white: whitelistDomainSets,
  211. black: blacklistDomainSets,
  212. foundDebugDomain
  213. };
  214. }
  215. function preprocessFullDomainSetBeforeUsedAsWorkerData (data) {
  216. return data.filter(domain => (
  217. domain.charCodeAt(0) === 46
  218. ));
  219. }
  220. module.exports.processDomainLists = processDomainLists;
  221. module.exports.processHosts = processHosts;
  222. module.exports.processFilterRules = processFilterRules;
  223. module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;