build-reject-domainset-worker.js 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. const Piscina = require('piscina');
  2. const { canExcludeFromDedupe } = require('../lib/parse-filter')
  3. const fullsetDomainStartsWithADot = Piscina.workerData
  4. const totalLen = fullsetDomainStartsWithADot.length;
  5. module.exports.dedupe = ({ chunk }) => {
  6. const chunkLength = chunk.length;
  7. const outputToBeRemoved = new Int8Array(chunkLength);
  8. for (let i = 0; i < chunkLength; i++) {
  9. const domainFromInput = chunk[i];
  10. if (canExcludeFromDedupe(domainFromInput)) {
  11. continue;
  12. }
  13. for (let j = 0; j < totalLen; j++) {
  14. const domainFromFullSet = fullsetDomainStartsWithADot[j];
  15. // domainFromFullSet is now startsWith a "."
  16. if (domainFromFullSet === domainFromInput) continue;
  17. const domainFromInputLen = domainFromInput.length;
  18. const domainFromFullSetLen = domainFromFullSet.length;
  19. // !domainFromInput.starsWith('.') && `.${domainFromInput}` === domainFromFullSet
  20. if (domainFromInput.charCodeAt(0) !== 46) {
  21. if (domainFromInputLen + 1 === domainFromFullSetLen) {
  22. let shouldBeRemoved = true;
  23. for (let k = 0; k < domainFromInputLen; k++) {
  24. if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
  25. shouldBeRemoved = false;
  26. break;
  27. }
  28. }
  29. if (shouldBeRemoved) {
  30. outputToBeRemoved[i] = 1;
  31. break;
  32. }
  33. }
  34. } else if (domainFromInputLen > domainFromFullSetLen) {
  35. // domainFromInput is now startsWith a "."
  36. if (domainFromInput.endsWith(domainFromFullSet)) {
  37. outputToBeRemoved[i] = 1;
  38. break;
  39. }
  40. }
  41. }
  42. }
  43. return Piscina.move(outputToBeRemoved);
  44. };