build-reject-domainset-worker.js 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. const Piscina = require('piscina');
  2. // pre check if fullset domain is starts with a "."
  3. // This avoid calling chatCodeAt repeatedly
  4. // workerData is an array of string. Sort it by length, short first:
  5. const fullsetDomainStartsWithADot = Piscina.workerData.filter(domain => (
  6. domain.charCodeAt(0) === 46
  7. && !canExcludeFromDedupe(domain)
  8. ));
  9. const totalLen = fullsetDomainStartsWithADot.length;
  10. module.exports.dedupe = ({ chunk }) => {
  11. const chunkLength = chunk.length;
  12. const outputToBeRemoved = new Int8Array(chunkLength);
  13. for (let i = 0; i < chunkLength; i++) {
  14. const domainFromInput = chunk[i];
  15. if (canExcludeFromDedupe(domainFromInput)) {
  16. continue;
  17. }
  18. for (let j = 0; j < totalLen; j++) {
  19. const domainFromFullSet = fullsetDomainStartsWithADot[j];
  20. // domainFromFullSet is now startsWith a "."
  21. if (domainFromFullSet === domainFromInput) continue;
  22. const domainFromInputLen = domainFromInput.length;
  23. const domainFromFullSetLen = domainFromFullSet.length;
  24. // !domainFromInput.starsWith('.') && `.${domainFromInput}` === domainFromFullSet
  25. if (domainFromInput.charCodeAt(0) !== 46) {
  26. if (domainFromInputLen + 1 === domainFromFullSetLen) {
  27. let shouldBeRemoved = true;
  28. for (let k = 0; k < domainFromInputLen; k++) {
  29. if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
  30. shouldBeRemoved = false;
  31. break;
  32. }
  33. }
  34. if (shouldBeRemoved) {
  35. outputToBeRemoved[i] = 1;
  36. break;
  37. }
  38. }
  39. }
  40. // domainFromInput is now startsWith a "."
  41. if (domainFromInputLen >= domainFromFullSetLen) {
  42. if (domainFromInput.endsWith(domainFromFullSet)) {
  43. outputToBeRemoved[i] = 1;
  44. break;
  45. }
  46. }
  47. }
  48. }
  49. return outputToBeRemoved;
  50. };
  51. // duckdns.org domain will not overlap and doesn't need dedupe
  52. function canExcludeFromDedupe (domain) {
  53. if (domain.length === 23 && domain.endsWith('.duckdns.org')) {
  54. return true;
  55. }
  56. return false;
  57. }