punycode.nim 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2016 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. import strutils
  10. import unicode
  11. # issue #3045
  12. const
  13. Base = 36
  14. TMin = 1
  15. TMax = 26
  16. Skew = 38
  17. Damp = 700
  18. InitialBias = 72
  19. InitialN = 128
  20. Delimiter = '-'
  21. type
  22. PunyError* = object of Exception
  23. proc decodeDigit(x: char): int {.raises: [PunyError].} =
  24. if '0' <= x and x <= '9':
  25. result = ord(x) - (ord('0') - 26)
  26. elif 'A' <= x and x <= 'Z':
  27. result = ord(x) - ord('A')
  28. elif 'a' <= x and x <= 'z':
  29. result = ord(x) - ord('a')
  30. else:
  31. raise newException(PunyError, "Bad input")
  32. proc encodeDigit(digit: int): Rune {.raises: [PunyError].} =
  33. if 0 <= digit and digit < 26:
  34. result = Rune(digit + ord('a'))
  35. elif 26 <= digit and digit < 36:
  36. result = Rune(digit + (ord('0') - 26))
  37. else:
  38. raise newException(PunyError, "internal error in punycode encoding")
  39. proc isBasic(c: char): bool = ord(c) < 0x80
  40. proc isBasic(r: Rune): bool = int(r) < 0x80
  41. proc adapt(delta, numPoints: int, first: bool): int =
  42. var d = if first: delta div Damp else: delta div 2
  43. d += d div numPoints
  44. var k = 0
  45. while d > ((Base-TMin)*TMax) div 2:
  46. d = d div (Base - TMin)
  47. k += Base
  48. result = k + (Base - TMin + 1) * d div (d + Skew)
  49. proc encode*(prefix, s: string): string {.raises: [PunyError].} =
  50. ## Encode a string that may contain Unicode.
  51. ## Prepend `prefix` to the result
  52. result = prefix
  53. var (d, n, bias) = (0, InitialN, InitialBias)
  54. var (b, remaining) = (0, 0)
  55. for r in s.runes:
  56. if r.isBasic:
  57. # basic Ascii character
  58. inc b
  59. result.add($r)
  60. else:
  61. # special character
  62. inc remaining
  63. var h = b
  64. if b > 0:
  65. result.add(Delimiter) # we have some Ascii chars
  66. while remaining != 0:
  67. var m: int = high(int32)
  68. for r in s.runes:
  69. if m > int(r) and int(r) >= n:
  70. m = int(r)
  71. d += (m - n) * (h + 1)
  72. if d < 0:
  73. raise newException(PunyError, "invalid label " & s)
  74. n = m
  75. for r in s.runes:
  76. if int(r) < n:
  77. inc d
  78. if d < 0:
  79. raise newException(PunyError, "invalid label " & s)
  80. continue
  81. if int(r) > n:
  82. continue
  83. var q = d
  84. var k = Base
  85. while true:
  86. var t = k - bias
  87. if t < TMin:
  88. t = TMin
  89. elif t > TMax:
  90. t = TMax
  91. if q < t:
  92. break
  93. result.add($encodeDigit(t + (q - t) mod (Base - t)))
  94. q = (q - t) div (Base - t)
  95. k += Base
  96. result.add($encodeDigit(q))
  97. bias = adapt(d, h + 1, h == b)
  98. d = 0
  99. inc h
  100. dec remaining
  101. inc d
  102. inc n
  103. proc encode*(s: string): string {.raises: [PunyError].} =
  104. ## Encode a string that may contain Unicode. Prefix is empty.
  105. result = encode("", s)
  106. proc decode*(encoded: string): string {.raises: [PunyError].} =
  107. ## Decode a Punycode-encoded string
  108. var
  109. n = InitialN
  110. i = 0
  111. bias = InitialBias
  112. var d = rfind(encoded, Delimiter)
  113. result = ""
  114. if d > 0:
  115. # found Delimiter
  116. for j in 0..<d:
  117. var c = encoded[j] # char
  118. if not c.isBasic:
  119. raise newException(PunyError, "Encoded contains a non-basic char")
  120. result.add(c) # add the character
  121. inc d
  122. else:
  123. d = 0 # set to first index
  124. while (d < len(encoded)):
  125. var oldi = i
  126. var w = 1
  127. var k = Base
  128. while true:
  129. if d == len(encoded):
  130. raise newException(PunyError, "Bad input: " & encoded)
  131. var c = encoded[d]; inc d
  132. var digit = int(decodeDigit(c))
  133. if digit > (high(int32) - i) div w:
  134. raise newException(PunyError, "Too large a value: " & $digit)
  135. i += digit * w
  136. var t: int
  137. if k <= bias:
  138. t = TMin
  139. elif k >= bias + TMax:
  140. t = TMax
  141. else:
  142. t = k - bias
  143. if digit < t:
  144. break
  145. w *= Base - t
  146. k += Base
  147. bias = adapt(i - oldi, runelen(result) + 1, oldi == 0)
  148. if i div (runelen(result) + 1) > high(int32) - n:
  149. raise newException(PunyError, "Value too large")
  150. n += i div (runelen(result) + 1)
  151. i = i mod (runelen(result) + 1)
  152. insert(result, $Rune(n), i)
  153. inc i
  154. when isMainModule:
  155. assert(decode(encode("", "bücher")) == "bücher")
  156. assert(decode(encode("münchen")) == "münchen")
  157. assert encode("xn--", "münchen") == "xn--mnchen-3ya"