normalize-domain.ts 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. // https://github.com/remusao/tldts/issues/2121
  2. // In short, single label domain suffix is ignored due to the size optimization, so no isIcann
  3. // import tldts from 'tldts-experimental';
  4. import tldts from 'tldts';
  5. import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
  6. import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
  7. export type TldTsParsed = ReturnType<typeof tldts.parse>;
  8. /**
  9. * Skipped the input non-empty check, the `domain` should not be empty.
  10. */
  11. function fastNormalizeDomainWithoutWwwNoIP(domain: string, parsed: TldTsParsed | null = null) {
  12. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  13. // This function won't run with IP, we can safely set normalizeTldtsOpt.detectIp to false.
  14. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  15. // Private invalid domain (things like .tor, .dn42, etc)
  16. if (!parsed.isIcann && !parsed.isPrivate) return null;
  17. if (parsed.subdomain) {
  18. if (
  19. parsed.subdomain === 'www'
  20. || parsed.subdomain === 'xml-v4'
  21. || parsed.subdomain === 'xml-eu'
  22. || parsed.subdomain === 'xml-eu-v4'
  23. // || (parsed.subdomain.length === 4 && parsed.subdomain.startsWith('www'))
  24. ) {
  25. return parsed.domain;
  26. }
  27. if (parsed.subdomain.startsWith('www.')) {
  28. return parsed.subdomain.slice(4) + '.' + parsed.domain;
  29. }
  30. }
  31. return parsed.hostname;
  32. }
  33. /**
  34. * Skipped the input non-empty check, the `domain` should not be empty.
  35. */
  36. export function fastNormalizeDomainWithoutWww(domain: string, parsed: TldTsParsed | null = null) {
  37. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  38. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  39. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  40. return null;
  41. }
  42. return fastNormalizeDomainWithoutWwwNoIP(domain, parsed);
  43. }
  44. /**
  45. * Skipped the input non-empty check, the `domain` should not be empty.
  46. */
  47. export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  48. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  49. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  50. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  51. return null;
  52. }
  53. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  54. // Private invalid domain (things like .tor, .dn42, etc)
  55. if (!parsed.isIcann && !parsed.isPrivate) return null;
  56. return parsed.hostname;
  57. }
  58. export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  59. if (domain.length === 0) return null;
  60. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  61. return null;
  62. }
  63. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  64. // Private invalid domain (things like .tor, .dn42, etc)
  65. if (!parsed.isIcann && !parsed.isPrivate) return null;
  66. // const h = parsed.hostname;
  67. // if (h === null) return null;
  68. return parsed.hostname;
  69. }