build-reject-domainset-worker.js 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. const { workerData } = require('piscina');
  2. exports.dedupe = ({ chunk }) => {
  3. const outputToBeRemoved = new Set();
  4. for (let i = 0, l = chunk.length; i < l; i++) {
  5. const domainFromInput = chunk[i];
  6. for (const domainFromFullSet of workerData) {
  7. if (outputToBeRemoved.has(domainFromFullSet)) continue;
  8. if (domainFromFullSet === domainFromInput) continue;
  9. if (domainFromFullSet.charAt(0) !== '.') continue;
  10. // domainFromFullSet is now startsWith a "."
  11. if (domainFromInput.charAt(0) !== '.') {
  12. let shouldBeRemoved = true;
  13. for (let j = 0, l2 = domainFromInput.length; j < l2; j++) {
  14. if (domainFromFullSet.charAt(j + 1) !== domainFromInput.charAt(j)) {
  15. shouldBeRemoved = false;
  16. break;
  17. }
  18. }
  19. if (shouldBeRemoved) {
  20. outputToBeRemoved.add(domainFromInput);
  21. break;
  22. }
  23. }
  24. // domainFromInput is now startsWith a "."
  25. if (domainFromInput.length >= domainFromFullSet.length) {
  26. if (domainFromInput.endsWith(domainFromFullSet)) {
  27. outputToBeRemoved.add(domainFromInput);
  28. break;
  29. }
  30. }
  31. }
  32. }
  33. return outputToBeRemoved;
  34. };
  35. exports.whitelisted = ({ whiteList }) => {
  36. const outputToBeRemoved = new Set();
  37. for (const domain of workerData) {
  38. for (const white of whiteList) {
  39. if (domain.includes(white) || white.includes(domain)) {
  40. outputToBeRemoved.add(domain);
  41. break;
  42. }
  43. }
  44. }
  45. return outputToBeRemoved;
  46. };
  47. exports.dedupeKeywords = ({ keywords, suffixes }) => {
  48. const outputToBeRemoved = new Set();
  49. for (const domain of workerData) {
  50. for (const keyword of keywords) {
  51. if (domain.includes(keyword) || keyword.includes(domain)) {
  52. outputToBeRemoved.add(domain);
  53. break;
  54. }
  55. }
  56. for (const suffix of suffixes) {
  57. if (domain.endsWith(suffix)) {
  58. outputToBeRemoved.add(domain);
  59. break;
  60. }
  61. }
  62. }
  63. return outputToBeRemoved;
  64. }