normalize-domain.ts 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. // https://github.com/remusao/tldts/issues/2121
  2. // In short, single label domain suffix is ignored due to the size optimization, so no isIcann
  3. // import tldts from 'tldts-experimental';
  4. import tldts from 'tldts';
  5. import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
  6. import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
  7. export type TldTsParsed = ReturnType<typeof tldts.parse>;
  8. /**
  9. * Skipped the input non-empty check, the `domain` should not be empty.
  10. */
  11. function fastNormalizeDomainWithoutWwwNoIP(domain: string, parsed: TldTsParsed | null = null) {
  12. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  13. // This function won't run with IP, we can safely set normalizeTldtsOpt.detectIp to false.
  14. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  15. // Private invalid domain (things like .tor, .dn42, etc)
  16. if (!parsed.isIcann && !parsed.isPrivate) return null;
  17. if (parsed.subdomain) {
  18. if (
  19. parsed.subdomain === 'www'
  20. || parsed.subdomain === 'xml-v4'
  21. || parsed.subdomain === 'xml-eu'
  22. || parsed.subdomain === 'xml-eu-v4'
  23. ) {
  24. return parsed.domain;
  25. }
  26. if (parsed.subdomain.startsWith('www.')) {
  27. return parsed.subdomain.slice(4) + '.' + parsed.domain;
  28. }
  29. }
  30. return parsed.hostname;
  31. }
  32. /**
  33. * Skipped the input non-empty check, the `domain` should not be empty.
  34. */
  35. export function fastNormalizeDomainWithoutWww(domain: string, parsed: TldTsParsed | null = null) {
  36. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  37. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  38. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  39. return null;
  40. }
  41. return fastNormalizeDomainWithoutWwwNoIP(domain, parsed);
  42. }
  43. /**
  44. * Skipped the input non-empty check, the `domain` should not be empty.
  45. */
  46. export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  47. // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
  48. // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
  49. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  50. return null;
  51. }
  52. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  53. // Private invalid domain (things like .tor, .dn42, etc)
  54. if (!parsed.isIcann && !parsed.isPrivate) return null;
  55. return parsed.hostname;
  56. }
  57. export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
  58. if (domain.length === 0) return null;
  59. if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
  60. return null;
  61. }
  62. parsed ??= tldts.parse(domain, normalizeTldtsOpt);
  63. // Private invalid domain (things like .tor, .dn42, etc)
  64. if (!parsed.isIcann && !parsed.isPrivate) return null;
  65. // const h = parsed.hostname;
  66. // if (h === null) return null;
  67. return parsed.hostname;
  68. }