uidna.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uidna.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003feb1
  16. * created by: Ram Viswanadha
  17. */
  18. #include "unicode/utypes.h"
  19. #if !UCONFIG_NO_IDNA
  20. #include "unicode/uidna.h"
  21. #include "unicode/ustring.h"
  22. #include "unicode/usprep.h"
  23. #include "punycode.h"
  24. #include "ustr_imp.h"
  25. #include "cmemory.h"
  26. #include "uassert.h"
  27. #include "sprpimpl.h"
  28. /* it is official IDNA ACE Prefix is "xn--" */
  29. static const char16_t ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
  30. #define ACE_PREFIX_LENGTH 4
  31. #define MAX_LABEL_LENGTH 63
  32. /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
  33. #define MAX_LABEL_BUFFER_SIZE 100
  34. #define MAX_DOMAIN_NAME_LENGTH 255
  35. /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
  36. #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
  37. #define LOWER_CASE_DELTA 0x0020
  38. #define HYPHEN 0x002D
  39. #define FULL_STOP 0x002E
  40. #define CAPITAL_A 0x0041
  41. #define CAPITAL_Z 0x005A
  42. inline static char16_t
  43. toASCIILower(char16_t ch){
  44. if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
  45. return ch + LOWER_CASE_DELTA;
  46. }
  47. return ch;
  48. }
  49. inline static UBool
  50. startsWithPrefix(const char16_t* src , int32_t srcLength){
  51. if(srcLength < ACE_PREFIX_LENGTH){
  52. return false;
  53. }
  54. for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
  55. if(toASCIILower(src[i]) != ACE_PREFIX[i]){
  56. return false;
  57. }
  58. }
  59. return true;
  60. }
  61. inline static int32_t
  62. compareCaseInsensitiveASCII(const char16_t* s1, int32_t s1Len,
  63. const char16_t* s2, int32_t s2Len){
  64. int32_t minLength;
  65. int32_t lengthResult;
  66. // are we comparing different lengths?
  67. if(s1Len != s2Len) {
  68. if(s1Len < s2Len) {
  69. minLength = s1Len;
  70. lengthResult = -1;
  71. } else {
  72. minLength = s2Len;
  73. lengthResult = 1;
  74. }
  75. } else {
  76. // ok the lengths are equal
  77. minLength = s1Len;
  78. lengthResult = 0;
  79. }
  80. char16_t c1,c2;
  81. int32_t rc;
  82. for(int32_t i =0;/* no condition */;i++) {
  83. /* If we reach the ends of both strings then they match */
  84. if(i == minLength) {
  85. return lengthResult;
  86. }
  87. c1 = s1[i];
  88. c2 = s2[i];
  89. /* Case-insensitive comparison */
  90. if(c1!=c2) {
  91. rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
  92. if(rc!=0) {
  93. lengthResult=rc;
  94. break;
  95. }
  96. }
  97. }
  98. return lengthResult;
  99. }
  100. /**
  101. * Ascertain if the given code point is a label separator as
  102. * defined by the IDNA RFC
  103. *
  104. * @param ch The code point to be ascertained
  105. * @return true if the char is a label separator
  106. * @stable ICU 2.8
  107. */
  108. static inline UBool isLabelSeparator(char16_t ch){
  109. switch(ch){
  110. case 0x002e:
  111. case 0x3002:
  112. case 0xFF0E:
  113. case 0xFF61:
  114. return true;
  115. default:
  116. return false;
  117. }
  118. }
  119. // returns the length of the label excluding the separator
  120. // if *limit == separator then the length returned does not include
  121. // the separtor.
  122. static inline int32_t
  123. getNextSeparator(char16_t *src, int32_t srcLength,
  124. char16_t **limit, UBool *done){
  125. if(srcLength == -1){
  126. int32_t i;
  127. for(i=0 ; ;i++){
  128. if(src[i] == 0){
  129. *limit = src + i; // point to null
  130. *done = true;
  131. return i;
  132. }
  133. if(isLabelSeparator(src[i])){
  134. *limit = src + (i+1); // go past the delimiter
  135. return i;
  136. }
  137. }
  138. }else{
  139. int32_t i;
  140. for(i=0;i<srcLength;i++){
  141. if(isLabelSeparator(src[i])){
  142. *limit = src + (i+1); // go past the delimiter
  143. return i;
  144. }
  145. }
  146. // we have not found the delimiter
  147. // if(i==srcLength)
  148. *limit = src+srcLength;
  149. *done = true;
  150. return i;
  151. }
  152. }
  153. static inline UBool isLDHChar(char16_t ch){
  154. // high runner case
  155. if(ch>0x007A){
  156. return false;
  157. }
  158. //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
  159. if( (ch==0x002D) ||
  160. (0x0030 <= ch && ch <= 0x0039) ||
  161. (0x0041 <= ch && ch <= 0x005A) ||
  162. (0x0061 <= ch && ch <= 0x007A)
  163. ){
  164. return true;
  165. }
  166. return false;
  167. }
  168. static int32_t
  169. _internal_toASCII(const char16_t* src, int32_t srcLength,
  170. char16_t* dest, int32_t destCapacity,
  171. int32_t options,
  172. UStringPrepProfile* nameprep,
  173. UParseError* parseError,
  174. UErrorCode* status)
  175. {
  176. // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
  177. char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
  178. //initialize pointers to stack buffers
  179. char16_t *b1 = b1Stack, *b2 = b2Stack;
  180. int32_t b1Len=0, b2Len,
  181. b1Capacity = MAX_LABEL_BUFFER_SIZE,
  182. b2Capacity = MAX_LABEL_BUFFER_SIZE ,
  183. reqLength=0;
  184. int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
  185. UBool* caseFlags = nullptr;
  186. // the source contains all ascii codepoints
  187. UBool srcIsASCII = true;
  188. // assume the source contains all LDH codepoints
  189. UBool srcIsLDH = true;
  190. int32_t j=0;
  191. //get the options
  192. UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
  193. int32_t failPos = -1;
  194. if(srcLength == -1){
  195. srcLength = u_strlen(src);
  196. }
  197. if(srcLength > b1Capacity){
  198. b1 = (char16_t*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
  199. if(b1==nullptr){
  200. *status = U_MEMORY_ALLOCATION_ERROR;
  201. goto CLEANUP;
  202. }
  203. b1Capacity = srcLength;
  204. }
  205. // step 1
  206. for( j=0;j<srcLength;j++){
  207. if(src[j] > 0x7F){
  208. srcIsASCII = false;
  209. }
  210. b1[b1Len++] = src[j];
  211. }
  212. // step 2 is performed only if the source contains non ASCII
  213. if(srcIsASCII == false){
  214. // step 2
  215. b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
  216. if(*status == U_BUFFER_OVERFLOW_ERROR){
  217. // redo processing of string
  218. // we do not have enough room so grow the buffer
  219. if(b1 != b1Stack){
  220. uprv_free(b1);
  221. }
  222. b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
  223. if(b1==nullptr){
  224. *status = U_MEMORY_ALLOCATION_ERROR;
  225. goto CLEANUP;
  226. }
  227. *status = U_ZERO_ERROR; // reset error
  228. b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
  229. }
  230. }
  231. // error bail out
  232. if(U_FAILURE(*status)){
  233. goto CLEANUP;
  234. }
  235. if(b1Len == 0){
  236. *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
  237. goto CLEANUP;
  238. }
  239. // for step 3 & 4
  240. srcIsASCII = true;
  241. for( j=0;j<b1Len;j++){
  242. // check if output of usprep_prepare is all ASCII
  243. if(b1[j] > 0x7F){
  244. srcIsASCII = false;
  245. }else if(isLDHChar(b1[j])==false){ // if the char is in ASCII range verify that it is an LDH character
  246. srcIsLDH = false;
  247. failPos = j;
  248. }
  249. }
  250. if(useSTD3ASCIIRules){
  251. // verify 3a and 3b
  252. // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
  253. // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
  254. // 3(b) Verify the absence of leading and trailing hyphen-minus; that
  255. // is, the absence of U+002D at the beginning and end of the
  256. // sequence.
  257. if( srcIsLDH == false /* source at this point should not contain anyLDH characters */
  258. || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
  259. *status = U_IDNA_STD3_ASCII_RULES_ERROR;
  260. /* populate the parseError struct */
  261. if(srcIsLDH==false){
  262. // failPos is always set the index of failure
  263. uprv_syntaxError(b1,failPos, b1Len,parseError);
  264. }else if(b1[0] == HYPHEN){
  265. // fail position is 0
  266. uprv_syntaxError(b1,0,b1Len,parseError);
  267. }else{
  268. // the last index in the source is always length-1
  269. uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
  270. }
  271. goto CLEANUP;
  272. }
  273. }
  274. // Step 4: if the source is ASCII then proceed to step 8
  275. if(srcIsASCII){
  276. if(b1Len <= destCapacity){
  277. u_memmove(dest, b1, b1Len);
  278. reqLength = b1Len;
  279. }else{
  280. reqLength = b1Len;
  281. goto CLEANUP;
  282. }
  283. }else{
  284. // step 5 : verify the sequence does not begin with ACE prefix
  285. if(!startsWithPrefix(b1,b1Len)){
  286. //step 6: encode the sequence with punycode
  287. // do not preserve the case flags for now!
  288. // TODO: Preserve the case while implementing the RFE
  289. // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
  290. // uprv_memset(caseFlags,true,b1Len);
  291. b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
  292. if(*status == U_BUFFER_OVERFLOW_ERROR){
  293. // redo processing of string
  294. /* we do not have enough room so grow the buffer*/
  295. b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
  296. if(b2 == nullptr){
  297. *status = U_MEMORY_ALLOCATION_ERROR;
  298. goto CLEANUP;
  299. }
  300. *status = U_ZERO_ERROR; // reset error
  301. b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
  302. }
  303. //error bail out
  304. if(U_FAILURE(*status)){
  305. goto CLEANUP;
  306. }
  307. // TODO : Reconsider while implementing the case preserve RFE
  308. // convert all codepoints to lower case ASCII
  309. // toASCIILower(b2,b2Len);
  310. reqLength = b2Len+ACE_PREFIX_LENGTH;
  311. if(reqLength > destCapacity){
  312. *status = U_BUFFER_OVERFLOW_ERROR;
  313. goto CLEANUP;
  314. }
  315. //Step 7: prepend the ACE prefix
  316. u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
  317. //Step 6: copy the contents in b2 into dest
  318. u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
  319. }else{
  320. *status = U_IDNA_ACE_PREFIX_ERROR;
  321. //position of failure is 0
  322. uprv_syntaxError(b1,0,b1Len,parseError);
  323. goto CLEANUP;
  324. }
  325. }
  326. // step 8: verify the length of label
  327. if(reqLength > MAX_LABEL_LENGTH){
  328. *status = U_IDNA_LABEL_TOO_LONG_ERROR;
  329. }
  330. CLEANUP:
  331. if(b1 != b1Stack){
  332. uprv_free(b1);
  333. }
  334. if(b2 != b2Stack){
  335. uprv_free(b2);
  336. }
  337. uprv_free(caseFlags);
  338. return u_terminateUChars(dest, destCapacity, reqLength, status);
  339. }
  340. static int32_t
  341. _internal_toUnicode(const char16_t* src, int32_t srcLength,
  342. char16_t* dest, int32_t destCapacity,
  343. int32_t options,
  344. UStringPrepProfile* nameprep,
  345. UParseError* parseError,
  346. UErrorCode* status)
  347. {
  348. //get the options
  349. //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
  350. int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
  351. // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
  352. char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
  353. //initialize pointers to stack buffers
  354. char16_t *b1 = b1Stack, *b2 = b2Stack, *b1Prime=nullptr, *b3=b3Stack;
  355. int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len,
  356. b1Capacity = MAX_LABEL_BUFFER_SIZE,
  357. b2Capacity = MAX_LABEL_BUFFER_SIZE,
  358. b3Capacity = MAX_LABEL_BUFFER_SIZE,
  359. reqLength=0;
  360. UBool* caseFlags = nullptr;
  361. UBool srcIsASCII = true;
  362. /*UBool srcIsLDH = true;
  363. int32_t failPos =0;*/
  364. // step 1: find out if all the codepoints in src are ASCII
  365. if(srcLength==-1){
  366. srcLength = 0;
  367. for(;src[srcLength]!=0;){
  368. if(src[srcLength]> 0x7f){
  369. srcIsASCII = false;
  370. }/*else if(isLDHChar(src[srcLength])==false){
  371. // here we do not assemble surrogates
  372. // since we know that LDH code points
  373. // are in the ASCII range only
  374. srcIsLDH = false;
  375. failPos = srcLength;
  376. }*/
  377. srcLength++;
  378. }
  379. }else if(srcLength > 0){
  380. for(int32_t j=0; j<srcLength; j++){
  381. if(src[j]> 0x7f){
  382. srcIsASCII = false;
  383. break;
  384. }/*else if(isLDHChar(src[j])==false){
  385. // here we do not assemble surrogates
  386. // since we know that LDH code points
  387. // are in the ASCII range only
  388. srcIsLDH = false;
  389. failPos = j;
  390. }*/
  391. }
  392. }else{
  393. return 0;
  394. }
  395. if(srcIsASCII == false){
  396. // step 2: process the string
  397. b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
  398. if(*status == U_BUFFER_OVERFLOW_ERROR){
  399. // redo processing of string
  400. /* we do not have enough room so grow the buffer*/
  401. b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
  402. if(b1==nullptr){
  403. *status = U_MEMORY_ALLOCATION_ERROR;
  404. goto CLEANUP;
  405. }
  406. *status = U_ZERO_ERROR; // reset error
  407. b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
  408. }
  409. //bail out on error
  410. if(U_FAILURE(*status)){
  411. goto CLEANUP;
  412. }
  413. }else{
  414. //just point src to b1
  415. b1 = (char16_t*) src;
  416. b1Len = srcLength;
  417. }
  418. // The RFC states that
  419. // <quote>
  420. // ToUnicode never fails. If any step fails, then the original input
  421. // is returned immediately in that step.
  422. // </quote>
  423. //step 3: verify ACE Prefix
  424. if(startsWithPrefix(b1,b1Len)){
  425. //step 4: Remove the ACE Prefix
  426. b1Prime = b1 + ACE_PREFIX_LENGTH;
  427. b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
  428. //step 5: Decode using punycode
  429. b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
  430. if(*status == U_BUFFER_OVERFLOW_ERROR){
  431. // redo processing of string
  432. /* we do not have enough room so grow the buffer*/
  433. b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
  434. if(b2==nullptr){
  435. *status = U_MEMORY_ALLOCATION_ERROR;
  436. goto CLEANUP;
  437. }
  438. *status = U_ZERO_ERROR; // reset error
  439. b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
  440. }
  441. //step 6:Apply toASCII
  442. b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
  443. if(*status == U_BUFFER_OVERFLOW_ERROR){
  444. // redo processing of string
  445. /* we do not have enough room so grow the buffer*/
  446. b3 = (char16_t*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
  447. if(b3==nullptr){
  448. *status = U_MEMORY_ALLOCATION_ERROR;
  449. goto CLEANUP;
  450. }
  451. *status = U_ZERO_ERROR; // reset error
  452. b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
  453. }
  454. //bail out on error
  455. if(U_FAILURE(*status)){
  456. goto CLEANUP;
  457. }
  458. //step 7: verify
  459. if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
  460. // Cause the original to be returned.
  461. *status = U_IDNA_VERIFICATION_ERROR;
  462. goto CLEANUP;
  463. }
  464. //step 8: return output of step 5
  465. reqLength = b2Len;
  466. if(b2Len <= destCapacity) {
  467. u_memmove(dest, b2, b2Len);
  468. }
  469. }
  470. else{
  471. // See the start of this if statement for why this is commented out.
  472. // verify that STD3 ASCII rules are satisfied
  473. /*if(useSTD3ASCIIRules == true){
  474. if( srcIsLDH == false // source contains some non-LDH characters
  475. || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
  476. *status = U_IDNA_STD3_ASCII_RULES_ERROR;
  477. // populate the parseError struct
  478. if(srcIsLDH==false){
  479. // failPos is always set the index of failure
  480. uprv_syntaxError(src,failPos, srcLength,parseError);
  481. }else if(src[0] == HYPHEN){
  482. // fail position is 0
  483. uprv_syntaxError(src,0,srcLength,parseError);
  484. }else{
  485. // the last index in the source is always length-1
  486. uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
  487. }
  488. goto CLEANUP;
  489. }
  490. }*/
  491. // just return the source
  492. //copy the source to destination
  493. if(srcLength <= destCapacity){
  494. u_memmove(dest, src, srcLength);
  495. }
  496. reqLength = srcLength;
  497. }
  498. CLEANUP:
  499. if(b1 != b1Stack && b1!=src){
  500. uprv_free(b1);
  501. }
  502. if(b2 != b2Stack){
  503. uprv_free(b2);
  504. }
  505. uprv_free(caseFlags);
  506. // The RFC states that
  507. // <quote>
  508. // ToUnicode never fails. If any step fails, then the original input
  509. // is returned immediately in that step.
  510. // </quote>
  511. // So if any step fails lets copy source to destination
  512. if(U_FAILURE(*status)){
  513. //copy the source to destination
  514. if(dest && srcLength <= destCapacity){
  515. // srcLength should have already been set earlier.
  516. U_ASSERT(srcLength >= 0);
  517. u_memmove(dest, src, srcLength);
  518. }
  519. reqLength = srcLength;
  520. *status = U_ZERO_ERROR;
  521. }
  522. return u_terminateUChars(dest, destCapacity, reqLength, status);
  523. }
  524. U_CAPI int32_t U_EXPORT2
  525. uidna_toASCII(const char16_t* src, int32_t srcLength,
  526. char16_t* dest, int32_t destCapacity,
  527. int32_t options,
  528. UParseError* parseError,
  529. UErrorCode* status){
  530. if(status == nullptr || U_FAILURE(*status)){
  531. return 0;
  532. }
  533. if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
  534. *status = U_ILLEGAL_ARGUMENT_ERROR;
  535. return 0;
  536. }
  537. UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
  538. if(U_FAILURE(*status)){
  539. return -1;
  540. }
  541. int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
  542. /* close the profile*/
  543. usprep_close(nameprep);
  544. return retLen;
  545. }
  546. U_CAPI int32_t U_EXPORT2
  547. uidna_toUnicode(const char16_t* src, int32_t srcLength,
  548. char16_t* dest, int32_t destCapacity,
  549. int32_t options,
  550. UParseError* parseError,
  551. UErrorCode* status){
  552. if(status == nullptr || U_FAILURE(*status)){
  553. return 0;
  554. }
  555. if( (src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
  556. *status = U_ILLEGAL_ARGUMENT_ERROR;
  557. return 0;
  558. }
  559. UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
  560. if(U_FAILURE(*status)){
  561. return -1;
  562. }
  563. int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
  564. usprep_close(nameprep);
  565. return retLen;
  566. }
  567. U_CAPI int32_t U_EXPORT2
  568. uidna_IDNToASCII( const char16_t *src, int32_t srcLength,
  569. char16_t* dest, int32_t destCapacity,
  570. int32_t options,
  571. UParseError *parseError,
  572. UErrorCode *status){
  573. if(status == nullptr || U_FAILURE(*status)){
  574. return 0;
  575. }
  576. if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
  577. *status = U_ILLEGAL_ARGUMENT_ERROR;
  578. return 0;
  579. }
  580. int32_t reqLength = 0;
  581. UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
  582. if(U_FAILURE(*status)){
  583. return 0;
  584. }
  585. //initialize pointers
  586. char16_t *delimiter = (char16_t*)src;
  587. char16_t *labelStart = (char16_t*)src;
  588. char16_t *currentDest = (char16_t*) dest;
  589. int32_t remainingLen = srcLength;
  590. int32_t remainingDestCapacity = destCapacity;
  591. int32_t labelLen = 0, labelReqLength = 0;
  592. UBool done = false;
  593. for(;;){
  594. labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
  595. labelReqLength = 0;
  596. if(!(labelLen==0 && done)){// make sure this is not a root label separator.
  597. labelReqLength = _internal_toASCII( labelStart, labelLen,
  598. currentDest, remainingDestCapacity,
  599. options, nameprep,
  600. parseError, status);
  601. if(*status == U_BUFFER_OVERFLOW_ERROR){
  602. *status = U_ZERO_ERROR; // reset error
  603. remainingDestCapacity = 0;
  604. }
  605. }
  606. if(U_FAILURE(*status)){
  607. break;
  608. }
  609. reqLength +=labelReqLength;
  610. // adjust the destination pointer
  611. if(labelReqLength < remainingDestCapacity){
  612. currentDest = currentDest + labelReqLength;
  613. remainingDestCapacity -= labelReqLength;
  614. }else{
  615. // should never occur
  616. remainingDestCapacity = 0;
  617. }
  618. if(done){
  619. break;
  620. }
  621. // add the label separator
  622. if(remainingDestCapacity > 0){
  623. *currentDest++ = FULL_STOP;
  624. remainingDestCapacity--;
  625. }
  626. reqLength++;
  627. labelStart = delimiter;
  628. if(remainingLen >0 ){
  629. remainingLen = (int32_t)(srcLength - (delimiter - src));
  630. }
  631. }
  632. if(reqLength > MAX_DOMAIN_NAME_LENGTH){
  633. *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
  634. }
  635. usprep_close(nameprep);
  636. return u_terminateUChars(dest, destCapacity, reqLength, status);
  637. }
  638. U_CAPI int32_t U_EXPORT2
  639. uidna_IDNToUnicode( const char16_t* src, int32_t srcLength,
  640. char16_t* dest, int32_t destCapacity,
  641. int32_t options,
  642. UParseError* parseError,
  643. UErrorCode* status){
  644. if(status == nullptr || U_FAILURE(*status)){
  645. return 0;
  646. }
  647. if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
  648. *status = U_ILLEGAL_ARGUMENT_ERROR;
  649. return 0;
  650. }
  651. int32_t reqLength = 0;
  652. UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
  653. if(U_FAILURE(*status)){
  654. return 0;
  655. }
  656. //initialize pointers
  657. char16_t *delimiter = (char16_t*)src;
  658. char16_t *labelStart = (char16_t*)src;
  659. char16_t *currentDest = (char16_t*) dest;
  660. int32_t remainingLen = srcLength;
  661. int32_t remainingDestCapacity = destCapacity;
  662. int32_t labelLen = 0, labelReqLength = 0;
  663. UBool done = false;
  664. for(;;){
  665. labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
  666. // The RFC states that
  667. // <quote>
  668. // ToUnicode never fails. If any step fails, then the original input
  669. // is returned immediately in that step.
  670. // </quote>
  671. // _internal_toUnicode will copy the label.
  672. /*if(labelLen==0 && done==false){
  673. *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
  674. break;
  675. }*/
  676. labelReqLength = _internal_toUnicode(labelStart, labelLen,
  677. currentDest, remainingDestCapacity,
  678. options, nameprep,
  679. parseError, status);
  680. if(*status == U_BUFFER_OVERFLOW_ERROR){
  681. *status = U_ZERO_ERROR; // reset error
  682. remainingDestCapacity = 0;
  683. }
  684. if(U_FAILURE(*status)){
  685. break;
  686. }
  687. reqLength +=labelReqLength;
  688. // adjust the destination pointer
  689. if(labelReqLength < remainingDestCapacity){
  690. currentDest = currentDest + labelReqLength;
  691. remainingDestCapacity -= labelReqLength;
  692. }else{
  693. // should never occur
  694. remainingDestCapacity = 0;
  695. }
  696. if(done){
  697. break;
  698. }
  699. // add the label separator
  700. // Unlike the ToASCII operation we don't normalize the label separators
  701. if(remainingDestCapacity > 0){
  702. *currentDest++ = *(labelStart + labelLen);
  703. remainingDestCapacity--;
  704. }
  705. reqLength++;
  706. labelStart = delimiter;
  707. if(remainingLen >0 ){
  708. remainingLen = (int32_t)(srcLength - (delimiter - src));
  709. }
  710. }
  711. if(reqLength > MAX_DOMAIN_NAME_LENGTH){
  712. *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
  713. }
  714. usprep_close(nameprep);
  715. return u_terminateUChars(dest, destCapacity, reqLength, status);
  716. }
  717. U_CAPI int32_t U_EXPORT2
  718. uidna_compare( const char16_t *s1, int32_t length1,
  719. const char16_t *s2, int32_t length2,
  720. int32_t options,
  721. UErrorCode* status){
  722. if(status == nullptr || U_FAILURE(*status)){
  723. return -1;
  724. }
  725. char16_t b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
  726. char16_t *b1 = b1Stack, *b2 = b2Stack;
  727. int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
  728. int32_t result=-1;
  729. UParseError parseError;
  730. b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
  731. if(*status == U_BUFFER_OVERFLOW_ERROR){
  732. // redo processing of string
  733. b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
  734. if(b1==nullptr){
  735. *status = U_MEMORY_ALLOCATION_ERROR;
  736. goto CLEANUP;
  737. }
  738. *status = U_ZERO_ERROR; // reset error
  739. b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
  740. }
  741. b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
  742. if(*status == U_BUFFER_OVERFLOW_ERROR){
  743. // redo processing of string
  744. b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
  745. if(b2==nullptr){
  746. *status = U_MEMORY_ALLOCATION_ERROR;
  747. goto CLEANUP;
  748. }
  749. *status = U_ZERO_ERROR; // reset error
  750. b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
  751. }
  752. // when toASCII is applied all label separators are replaced with FULL_STOP
  753. result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
  754. CLEANUP:
  755. if(b1 != b1Stack){
  756. uprv_free(b1);
  757. }
  758. if(b2 != b2Stack){
  759. uprv_free(b2);
  760. }
  761. return result;
  762. }
  763. #endif /* #if !UCONFIG_NO_IDNA */