punycode.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. /* Copyright Mathias Bynens <https://mathiasbynens.be/>
  2. Permission is hereby granted, free of charge, to any person obtaining
  3. a copy of this software and associated documentation files (the
  4. "Software"), to deal in the Software without restriction, including
  5. without limitation the rights to use, copy, modify, merge, publish,
  6. distribute, sublicense, and/or sell copies of the Software, and to
  7. permit persons to whom the Software is furnished to do so, subject to
  8. the following conditions:
  9. The above copyright notice and this permission notice shall be
  10. included in all copies or substantial portions of the Software.
  11. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  12. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  13. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  14. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  15. LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  16. OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  17. WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  18. */
  19. /*! https://mths.be/punycode v1.3.2 by @mathias */
  20. ;(function(root) {
  21. /** Detect free variables */
  22. var freeExports = typeof exports == 'object' && exports &&
  23. !exports.nodeType && exports;
  24. var freeModule = typeof module == 'object' && module &&
  25. !module.nodeType && module;
  26. var freeGlobal = typeof global == 'object' && global;
  27. if (
  28. freeGlobal.global === freeGlobal ||
  29. freeGlobal.window === freeGlobal ||
  30. freeGlobal.self === freeGlobal
  31. ) {
  32. root = freeGlobal;
  33. }
  34. /**
  35. * The `punycode` object.
  36. * @name punycode
  37. * @type Object
  38. */
  39. var punycode,
  40. /** Highest positive signed 32-bit float value */
  41. maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  42. /** Bootstring parameters */
  43. base = 36,
  44. tMin = 1,
  45. tMax = 26,
  46. skew = 38,
  47. damp = 700,
  48. initialBias = 72,
  49. initialN = 128, // 0x80
  50. delimiter = '-', // '\x2D'
  51. /** Regular expressions */
  52. regexPunycode = /^xn--/,
  53. regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars
  54. regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators
  55. /** Error messages */
  56. errors = {
  57. 'overflow': 'Overflow: input needs wider integers to process',
  58. 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  59. 'invalid-input': 'Invalid input'
  60. },
  61. /** Convenience shortcuts */
  62. baseMinusTMin = base - tMin,
  63. floor = Math.floor,
  64. stringFromCharCode = String.fromCharCode,
  65. /** Temporary variable */
  66. key;
  67. /*--------------------------------------------------------------------------*/
  68. /**
  69. * A generic error utility function.
  70. * @private
  71. * @param {String} type The error type.
  72. * @returns {Error} Throws a `RangeError` with the applicable error message.
  73. */
  74. function error(type) {
  75. throw RangeError(errors[type]);
  76. }
  77. /**
  78. * A generic `Array#map` utility function.
  79. * @private
  80. * @param {Array} array The array to iterate over.
  81. * @param {Function} callback The function that gets called for every array
  82. * item.
  83. * @returns {Array} A new array of values returned by the callback function.
  84. */
  85. function map(array, fn) {
  86. var length = array.length;
  87. var result = [];
  88. while (length--) {
  89. result[length] = fn(array[length]);
  90. }
  91. return result;
  92. }
  93. /**
  94. * A simple `Array#map`-like wrapper to work with domain name strings or email
  95. * addresses.
  96. * @private
  97. * @param {String} domain The domain name or email address.
  98. * @param {Function} callback The function that gets called for every
  99. * character.
  100. * @returns {Array} A new string of characters returned by the callback
  101. * function.
  102. */
  103. function mapDomain(string, fn) {
  104. var parts = string.split('@');
  105. var result = '';
  106. if (parts.length > 1) {
  107. // In email addresses, only the domain name should be punycoded. Leave
  108. // the local part (i.e. everything up to `@`) intact.
  109. result = parts[0] + '@';
  110. string = parts[1];
  111. }
  112. // Avoid `split(regex)` for IE8 compatibility. See #17.
  113. string = string.replace(regexSeparators, '\x2E');
  114. var labels = string.split('.');
  115. var encoded = map(labels, fn).join('.');
  116. return result + encoded;
  117. }
  118. /**
  119. * Creates an array containing the numeric code points of each Unicode
  120. * character in the string. While JavaScript uses UCS-2 internally,
  121. * this function will convert a pair of surrogate halves (each of which
  122. * UCS-2 exposes as separate characters) into a single code point,
  123. * matching UTF-16.
  124. * @see `punycode.ucs2.encode`
  125. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  126. * @memberOf punycode.ucs2
  127. * @name decode
  128. * @param {String} string The Unicode input string (UCS-2).
  129. * @returns {Array} The new array of code points.
  130. */
  131. function ucs2decode(string) {
  132. var output = [],
  133. counter = 0,
  134. length = string.length,
  135. value,
  136. extra;
  137. while (counter < length) {
  138. value = string.charCodeAt(counter++);
  139. if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
  140. // high surrogate, and there is a next character
  141. extra = string.charCodeAt(counter++);
  142. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  143. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  144. } else {
  145. // unmatched surrogate; only append this code unit, in case the next
  146. // code unit is the high surrogate of a surrogate pair
  147. output.push(value);
  148. counter--;
  149. }
  150. } else {
  151. output.push(value);
  152. }
  153. }
  154. return output;
  155. }
  156. /**
  157. * Creates a string based on an array of numeric code points.
  158. * @see `punycode.ucs2.decode`
  159. * @memberOf punycode.ucs2
  160. * @name encode
  161. * @param {Array} codePoints The array of numeric code points.
  162. * @returns {String} The new Unicode string (UCS-2).
  163. */
  164. function ucs2encode(array) {
  165. return map(array, function(value) {
  166. var output = '';
  167. if (value > 0xFFFF) {
  168. value -= 0x10000;
  169. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  170. value = 0xDC00 | value & 0x3FF;
  171. }
  172. output += stringFromCharCode(value);
  173. return output;
  174. }).join('');
  175. }
  176. /**
  177. * Converts a basic code point into a digit/integer.
  178. * @see `digitToBasic()`
  179. * @private
  180. * @param {Number} codePoint The basic numeric code point value.
  181. * @returns {Number} The numeric value of a basic code point (for use in
  182. * representing integers) in the range `0` to `base - 1`, or `base` if
  183. * the code point does not represent a value.
  184. */
  185. function basicToDigit(codePoint) {
  186. if (codePoint - 48 < 10) {
  187. return codePoint - 22;
  188. }
  189. if (codePoint - 65 < 26) {
  190. return codePoint - 65;
  191. }
  192. if (codePoint - 97 < 26) {
  193. return codePoint - 97;
  194. }
  195. return base;
  196. }
  197. /**
  198. * Converts a digit/integer into a basic code point.
  199. * @see `basicToDigit()`
  200. * @private
  201. * @param {Number} digit The numeric value of a basic code point.
  202. * @returns {Number} The basic code point whose value (when used for
  203. * representing integers) is `digit`, which needs to be in the range
  204. * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
  205. * used; else, the lowercase form is used. The behavior is undefined
  206. * if `flag` is non-zero and `digit` has no uppercase form.
  207. */
  208. function digitToBasic(digit, flag) {
  209. // 0..25 map to ASCII a..z or A..Z
  210. // 26..35 map to ASCII 0..9
  211. return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
  212. }
  213. /**
  214. * Bias adaptation function as per section 3.4 of RFC 3492.
  215. * http://tools.ietf.org/html/rfc3492#section-3.4
  216. * @private
  217. */
  218. function adapt(delta, numPoints, firstTime) {
  219. var k = 0;
  220. delta = firstTime ? floor(delta / damp) : delta >> 1;
  221. delta += floor(delta / numPoints);
  222. for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
  223. delta = floor(delta / baseMinusTMin);
  224. }
  225. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  226. }
  227. /**
  228. * Converts a Punycode string of ASCII-only symbols to a string of Unicode
  229. * symbols.
  230. * @memberOf punycode
  231. * @param {String} input The Punycode string of ASCII-only symbols.
  232. * @returns {String} The resulting string of Unicode symbols.
  233. */
  234. function decode(input) {
  235. // Don't use UCS-2
  236. var output = [],
  237. inputLength = input.length,
  238. out,
  239. i = 0,
  240. n = initialN,
  241. bias = initialBias,
  242. basic,
  243. j,
  244. index,
  245. oldi,
  246. w,
  247. k,
  248. digit,
  249. t,
  250. /** Cached calculation results */
  251. baseMinusT;
  252. // Handle the basic code points: let `basic` be the number of input code
  253. // points before the last delimiter, or `0` if there is none, then copy
  254. // the first basic code points to the output.
  255. basic = input.lastIndexOf(delimiter);
  256. if (basic < 0) {
  257. basic = 0;
  258. }
  259. for (j = 0; j < basic; ++j) {
  260. // if it's not a basic code point
  261. if (input.charCodeAt(j) >= 0x80) {
  262. error('not-basic');
  263. }
  264. output.push(input.charCodeAt(j));
  265. }
  266. // Main decoding loop: start just after the last delimiter if any basic code
  267. // points were copied; start at the beginning otherwise.
  268. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
  269. // `index` is the index of the next character to be consumed.
  270. // Decode a generalized variable-length integer into `delta`,
  271. // which gets added to `i`. The overflow checking is easier
  272. // if we increase `i` as we go, then subtract off its starting
  273. // value at the end to obtain `delta`.
  274. for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
  275. if (index >= inputLength) {
  276. error('invalid-input');
  277. }
  278. digit = basicToDigit(input.charCodeAt(index++));
  279. if (digit >= base || digit > floor((maxInt - i) / w)) {
  280. error('overflow');
  281. }
  282. i += digit * w;
  283. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  284. if (digit < t) {
  285. break;
  286. }
  287. baseMinusT = base - t;
  288. if (w > floor(maxInt / baseMinusT)) {
  289. error('overflow');
  290. }
  291. w *= baseMinusT;
  292. }
  293. out = output.length + 1;
  294. bias = adapt(i - oldi, out, oldi == 0);
  295. // `i` was supposed to wrap around from `out` to `0`,
  296. // incrementing `n` each time, so we'll fix that now:
  297. if (floor(i / out) > maxInt - n) {
  298. error('overflow');
  299. }
  300. n += floor(i / out);
  301. i %= out;
  302. // Insert `n` at position `i` of the output
  303. output.splice(i++, 0, n);
  304. }
  305. return ucs2encode(output);
  306. }
  307. /**
  308. * Converts a string of Unicode symbols (e.g. a domain name label) to a
  309. * Punycode string of ASCII-only symbols.
  310. * @memberOf punycode
  311. * @param {String} input The string of Unicode symbols.
  312. * @returns {String} The resulting Punycode string of ASCII-only symbols.
  313. */
  314. function encode(input) {
  315. var n,
  316. delta,
  317. handledCPCount,
  318. basicLength,
  319. bias,
  320. j,
  321. m,
  322. q,
  323. k,
  324. t,
  325. currentValue,
  326. output = [],
  327. /** `inputLength` will hold the number of code points in `input`. */
  328. inputLength,
  329. /** Cached calculation results */
  330. handledCPCountPlusOne,
  331. baseMinusT,
  332. qMinusT;
  333. // Convert the input in UCS-2 to Unicode
  334. input = ucs2decode(input);
  335. // Cache the length
  336. inputLength = input.length;
  337. // Initialize the state
  338. n = initialN;
  339. delta = 0;
  340. bias = initialBias;
  341. // Handle the basic code points
  342. for (j = 0; j < inputLength; ++j) {
  343. currentValue = input[j];
  344. if (currentValue < 0x80) {
  345. output.push(stringFromCharCode(currentValue));
  346. }
  347. }
  348. handledCPCount = basicLength = output.length;
  349. // `handledCPCount` is the number of code points that have been handled;
  350. // `basicLength` is the number of basic code points.
  351. // Finish the basic string - if it is not empty - with a delimiter
  352. if (basicLength) {
  353. output.push(delimiter);
  354. }
  355. // Main encoding loop:
  356. while (handledCPCount < inputLength) {
  357. // All non-basic code points < n have been handled already. Find the next
  358. // larger one:
  359. for (m = maxInt, j = 0; j < inputLength; ++j) {
  360. currentValue = input[j];
  361. if (currentValue >= n && currentValue < m) {
  362. m = currentValue;
  363. }
  364. }
  365. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
  366. // but guard against overflow
  367. handledCPCountPlusOne = handledCPCount + 1;
  368. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  369. error('overflow');
  370. }
  371. delta += (m - n) * handledCPCountPlusOne;
  372. n = m;
  373. for (j = 0; j < inputLength; ++j) {
  374. currentValue = input[j];
  375. if (currentValue < n && ++delta > maxInt) {
  376. error('overflow');
  377. }
  378. if (currentValue == n) {
  379. // Represent delta as a generalized variable-length integer
  380. for (q = delta, k = base; /* no condition */; k += base) {
  381. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  382. if (q < t) {
  383. break;
  384. }
  385. qMinusT = q - t;
  386. baseMinusT = base - t;
  387. output.push(
  388. stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
  389. );
  390. q = floor(qMinusT / baseMinusT);
  391. }
  392. output.push(stringFromCharCode(digitToBasic(q, 0)));
  393. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  394. delta = 0;
  395. ++handledCPCount;
  396. }
  397. }
  398. ++delta;
  399. ++n;
  400. }
  401. return output.join('');
  402. }
  403. /**
  404. * Converts a Punycode string representing a domain name or an email address
  405. * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
  406. * it doesn't matter if you call it on a string that has already been
  407. * converted to Unicode.
  408. * @memberOf punycode
  409. * @param {String} input The Punycoded domain name or email address to
  410. * convert to Unicode.
  411. * @returns {String} The Unicode representation of the given Punycode
  412. * string.
  413. */
  414. function toUnicode(input) {
  415. return mapDomain(input, function(string) {
  416. return regexPunycode.test(string)
  417. ? decode(string.slice(4).toLowerCase())
  418. : string;
  419. });
  420. }
  421. /**
  422. * Converts a Unicode string representing a domain name or an email address to
  423. * Punycode. Only the non-ASCII parts of the domain name will be converted,
  424. * i.e. it doesn't matter if you call it with a domain that's already in
  425. * ASCII.
  426. * @memberOf punycode
  427. * @param {String} input The domain name or email address to convert, as a
  428. * Unicode string.
  429. * @returns {String} The Punycode representation of the given domain name or
  430. * email address.
  431. */
  432. function toASCII(input) {
  433. return mapDomain(input, function(string) {
  434. return regexNonASCII.test(string)
  435. ? 'xn--' + encode(string)
  436. : string;
  437. });
  438. }
  439. /*--------------------------------------------------------------------------*/
  440. /** Define the public API */
  441. punycode = {
  442. /**
  443. * A string representing the current Punycode.js version number.
  444. * @memberOf punycode
  445. * @type String
  446. */
  447. 'version': '1.3.2',
  448. /**
  449. * An object of methods to convert from JavaScript's internal character
  450. * representation (UCS-2) to Unicode code points, and back.
  451. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  452. * @memberOf punycode
  453. * @type Object
  454. */
  455. 'ucs2': {
  456. 'decode': ucs2decode,
  457. 'encode': ucs2encode
  458. },
  459. 'decode': decode,
  460. 'encode': encode,
  461. 'toASCII': toASCII,
  462. 'toUnicode': toUnicode
  463. };
  464. /** Expose `punycode` */
  465. // Some AMD build optimizers, like r.js, check for specific condition patterns
  466. // like the following:
  467. if (
  468. typeof define == 'function' &&
  469. typeof define.amd == 'object' &&
  470. define.amd
  471. ) {
  472. define('punycode', function() {
  473. return punycode;
  474. });
  475. } else if (freeExports && freeModule) {
  476. if (module.exports == freeExports) { // in Node.js or RingoJS v0.8.0+
  477. freeModule.exports = punycode;
  478. } else { // in Narwhal or RingoJS v0.7.0-
  479. for (key in punycode) {
  480. punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
  481. }
  482. }
  483. } else { // in Rhino or a web browser
  484. root.punycode = punycode;
  485. }
  486. }(this));