normalize-domain.ts 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. // https://github.com/remusao/tldts/issues/2121
  2. // In short, single label domain suffix is ignored due to the size optimization, so no isIcann
  3. // import tldts from 'tldts-experimental';
  4. import tldts from 'tldts';
  5. import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
  6. import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
  7. export type TldTsParsed = ReturnType<typeof tldts.parse>;
  8. /**
  9. * Skipped the input non-empty check, the `domain` should not be empty.
  10. */
  11. export function fastNormalizeDomainWithoutWwwNoIP(domain: string, parsed: TldTsParsed | null = null) {
  12. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  13. // This function won't run with IP, we can safely set normalizeTldtsOpt.detectIp to false.
  14. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  15. // Private invalid domain (things like .tor, .dn42, etc)
  16. if (!parsed.isIcann && !parsed.isPrivate) return null;
  17. if (parsed.subdomain) {
  18. if (parsed.subdomain === 'www' || parsed.subdomain === 'xml-v4') {
  19. return parsed.domain;
  20. }
  21. if (parsed.subdomain.startsWith('www.')) {
  22. return parsed.subdomain.slice(4) + '.' + parsed.domain;
  23. }
  24. }
  25. return parsed.hostname;
  26. }
  27. /**
  28. * Skipped the input non-empty check, the `domain` should not be empty.
  29. */
  30. export function fastNormalizeDomainWithoutWww(domain: string, parsed: TldTsParsed | null = null) {
  31. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  32. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  33. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  34. return null;
  35. }
  36. return fastNormalizeDomainWithoutWwwNoIP(domain, parsed);
  37. }
  38. /**
  39. * Skipped the input non-empty check, the `domain` should not be empty.
  40. */
  41. export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  42. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  43. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  44. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  45. return null;
  46. }
  47. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  48. // Private invalid domain (things like .tor, .dn42, etc)
  49. if (!parsed.isIcann && !parsed.isPrivate) return null;
  50. return parsed.hostname;
  51. }
  52. export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  53. if (domain.length === 0) return null;
  54. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  55. return null;
  56. }
  57. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  58. // Private invalid domain (things like .tor, .dn42, etc)
  59. if (!parsed.isIcann && !parsed.isPrivate) return null;
  60. // const h = parsed.hostname;
  61. // if (h === null) return null;
  62. return parsed.hostname;
  63. }