build-reject-domainset-worker.js 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. const { workerData } = require('piscina');
  2. const len = workerData.length;
  3. exports.dedupe = ({ chunk }) => {
  4. const outputToBeRemoved = new Set();
  5. for (let i = 0, l = chunk.length; i < l; i++) {
  6. const domainFromInput = chunk[i];
  7. for (let j = 0; j < len; j++) {
  8. const domainFromFullSet = workerData[j];
  9. if (domainFromFullSet === domainFromInput) continue;
  10. if (domainFromFullSet.charCodeAt(0) !== 46) continue;
  11. // domainFromFullSet is now startsWith a "."
  12. if (domainFromInput.charCodeAt(0) !== 46) {
  13. let shouldBeRemoved = true;
  14. for (let k = 0, l2 = domainFromInput.length; k < l2; k++) {
  15. if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
  16. shouldBeRemoved = false;
  17. break;
  18. }
  19. }
  20. if (shouldBeRemoved) {
  21. outputToBeRemoved.add(domainFromInput);
  22. break;
  23. }
  24. }
  25. // domainFromInput is now startsWith a "."
  26. if (domainFromInput.length >= domainFromFullSet.length) {
  27. if (domainFromInput.endsWith(domainFromFullSet)) {
  28. outputToBeRemoved.add(domainFromInput);
  29. break;
  30. }
  31. }
  32. }
  33. }
  34. return outputToBeRemoved;
  35. };
  36. exports.whitelisted = ({ whiteList }) => {
  37. const outputToBeRemoved = new Set();
  38. for (const domain of workerData) {
  39. for (const white of whiteList) {
  40. if (domain.includes(white) || white.includes(domain)) {
  41. outputToBeRemoved.add(domain);
  42. break;
  43. }
  44. }
  45. }
  46. return outputToBeRemoved;
  47. };
  48. exports.dedupeKeywords = ({ keywords, suffixes }) => {
  49. const outputToBeRemoved = new Set();
  50. for (const domain of workerData) {
  51. for (const keyword of keywords) {
  52. if (domain.includes(keyword) || keyword.includes(domain)) {
  53. outputToBeRemoved.add(domain);
  54. break;
  55. }
  56. }
  57. for (const suffix of suffixes) {
  58. if (domain.endsWith(suffix)) {
  59. outputToBeRemoved.add(domain);
  60. break;
  61. }
  62. }
  63. }
  64. return outputToBeRemoved;
  65. }