nsIDNService.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsIDNService.h"
  6. #include "nsReadableUtils.h"
  7. #include "nsCRT.h"
  8. #include "nsUnicharUtils.h"
  9. #include "nsUnicodeProperties.h"
  10. #include "nsUnicodeScriptCodes.h"
  11. #include "harfbuzz/hb.h"
  12. #include "nsIServiceManager.h"
  13. #include "nsIPrefService.h"
  14. #include "nsIPrefBranch.h"
  15. #include "nsIObserverService.h"
  16. #include "nsISupportsPrimitives.h"
  17. #include "punycode.h"
  18. // Currently we use the non-transitional processing option -- see
  19. // http://unicode.org/reports/tr46/
  20. // To switch to transitional processing, change the value of this flag
  21. // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
  22. // (revert bug 1218179).
  23. const bool kIDNA2008_TransitionalProcessing = false;
  24. #include "ICUUtils.h"
  25. #include "unicode/uscript.h"
  26. using namespace mozilla::unicode;
  27. //-----------------------------------------------------------------------------
  28. // RFC 1034 - 3.1. Name space specifications and terminology
  29. static const uint32_t kMaxDNSNodeLen = 63;
  30. // RFC 3490 - 5. ACE prefix
  31. static const char kACEPrefix[] = "xn--";
  32. #define kACEPrefixLen 4
  33. //-----------------------------------------------------------------------------
  34. #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
  35. #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
  36. #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
  37. #define NS_NET_PREF_IDNUSEWHITELIST "network.IDN.use_whitelist"
  38. #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
  39. inline bool isOnlySafeChars(const nsAFlatString& in,
  40. const nsAFlatString& blacklist)
  41. {
  42. return (blacklist.IsEmpty() ||
  43. in.FindCharInSet(blacklist) == kNotFound);
  44. }
  45. //-----------------------------------------------------------------------------
  46. // nsIDNService
  47. //-----------------------------------------------------------------------------
  48. /* Implementation file */
  49. NS_IMPL_ISUPPORTS(nsIDNService,
  50. nsIIDNService,
  51. nsIObserver,
  52. nsISupportsWeakReference)
  53. nsresult nsIDNService::Init()
  54. {
  55. nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
  56. if (prefs)
  57. prefs->GetBranch(NS_NET_PREF_IDNWHITELIST, getter_AddRefs(mIDNWhitelistPrefBranch));
  58. nsCOMPtr<nsIPrefBranch> prefInternal(do_QueryInterface(prefs));
  59. if (prefInternal) {
  60. prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, true);
  61. prefInternal->AddObserver(NS_NET_PREF_SHOWPUNYCODE, this, true);
  62. prefInternal->AddObserver(NS_NET_PREF_IDNRESTRICTION, this, true);
  63. prefInternal->AddObserver(NS_NET_PREF_IDNUSEWHITELIST, this, true);
  64. prefsChanged(prefInternal, nullptr);
  65. }
  66. return NS_OK;
  67. }
  68. NS_IMETHODIMP nsIDNService::Observe(nsISupports *aSubject,
  69. const char *aTopic,
  70. const char16_t *aData)
  71. {
  72. if (!strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
  73. nsCOMPtr<nsIPrefBranch> prefBranch( do_QueryInterface(aSubject) );
  74. if (prefBranch)
  75. prefsChanged(prefBranch, aData);
  76. }
  77. return NS_OK;
  78. }
  79. void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const char16_t *pref)
  80. {
  81. if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST).Equals(pref)) {
  82. nsCOMPtr<nsISupportsString> blacklist;
  83. nsresult rv = prefBranch->GetComplexValue(NS_NET_PREF_IDNBLACKLIST,
  84. NS_GET_IID(nsISupportsString),
  85. getter_AddRefs(blacklist));
  86. if (NS_SUCCEEDED(rv))
  87. blacklist->ToString(getter_Copies(mIDNBlacklist));
  88. else
  89. mIDNBlacklist.Truncate();
  90. }
  91. if (!pref || NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
  92. bool val;
  93. if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE, &val)))
  94. mShowPunycode = val;
  95. }
  96. if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNUSEWHITELIST).Equals(pref)) {
  97. bool val;
  98. if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNUSEWHITELIST,
  99. &val)))
  100. mIDNUseWhitelist = val;
  101. }
  102. if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
  103. nsXPIDLCString profile;
  104. if (NS_FAILED(prefBranch->GetCharPref(NS_NET_PREF_IDNRESTRICTION,
  105. getter_Copies(profile)))) {
  106. profile.Truncate();
  107. }
  108. if (profile.EqualsLiteral("moderate")) {
  109. mRestrictionProfile = eModeratelyRestrictiveProfile;
  110. } else if (profile.EqualsLiteral("high")) {
  111. mRestrictionProfile = eHighlyRestrictiveProfile;
  112. } else {
  113. mRestrictionProfile = eASCIIOnlyProfile;
  114. }
  115. }
  116. }
  117. nsIDNService::nsIDNService()
  118. : mShowPunycode(false)
  119. , mIDNUseWhitelist(false)
  120. {
  121. uint32_t IDNAOptions = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ;
  122. if (!kIDNA2008_TransitionalProcessing) {
  123. IDNAOptions |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
  124. }
  125. UErrorCode errorCode = U_ZERO_ERROR;
  126. mIDNA = uidna_openUTS46(IDNAOptions, &errorCode);
  127. }
  128. nsIDNService::~nsIDNService()
  129. {
  130. uidna_close(mIDNA);
  131. }
  132. nsresult
  133. nsIDNService::IDNA2008ToUnicode(const nsACString& input, nsAString& output)
  134. {
  135. NS_ConvertUTF8toUTF16 inputStr(input);
  136. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  137. UErrorCode errorCode = U_ZERO_ERROR;
  138. int32_t inLen = inputStr.Length();
  139. int32_t outMaxLen = kMaxDNSNodeLen + 1;
  140. UChar outputBuffer[kMaxDNSNodeLen + 1];
  141. int32_t outLen = uidna_labelToUnicode(mIDNA, (const UChar*)inputStr.get(),
  142. inLen, outputBuffer, outMaxLen,
  143. &info, &errorCode);
  144. if (info.errors != 0) {
  145. return NS_ERROR_MALFORMED_URI;
  146. }
  147. if (U_SUCCESS(errorCode)) {
  148. ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
  149. }
  150. nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
  151. if (rv == NS_ERROR_FAILURE) {
  152. rv = NS_ERROR_MALFORMED_URI;
  153. }
  154. return rv;
  155. }
  156. nsresult
  157. nsIDNService::IDNA2008StringPrep(const nsAString& input,
  158. nsAString& output,
  159. stringPrepFlag flag)
  160. {
  161. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  162. UErrorCode errorCode = U_ZERO_ERROR;
  163. int32_t inLen = input.Length();
  164. int32_t outMaxLen = kMaxDNSNodeLen + 1;
  165. UChar outputBuffer[kMaxDNSNodeLen + 1];
  166. int32_t outLen =
  167. uidna_labelToUnicode(mIDNA, (const UChar*)PromiseFlatString(input).get(),
  168. inLen, outputBuffer, outMaxLen, &info, &errorCode);
  169. nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
  170. if (rv == NS_ERROR_FAILURE) {
  171. rv = NS_ERROR_MALFORMED_URI;
  172. }
  173. NS_ENSURE_SUCCESS(rv, rv);
  174. // Output the result of nameToUnicode even if there were errors
  175. ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
  176. if (flag == eStringPrepIgnoreErrors) {
  177. return NS_OK;
  178. }
  179. uint32_t ignoredErrors = 0;
  180. if (flag == eStringPrepForDNS) {
  181. ignoredErrors = UIDNA_ERROR_LEADING_HYPHEN | UIDNA_ERROR_TRAILING_HYPHEN |
  182. UIDNA_ERROR_HYPHEN_3_4;
  183. }
  184. if ((info.errors & ~ignoredErrors) != 0) {
  185. if (flag == eStringPrepForDNS) {
  186. output.Truncate();
  187. }
  188. rv = NS_ERROR_MALFORMED_URI;
  189. }
  190. return rv;
  191. }
  192. NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
  193. {
  194. return UTF8toACE(input, ace, eStringPrepForDNS);
  195. }
  196. nsresult nsIDNService::UTF8toACE(const nsACString & input, nsACString & ace,
  197. stringPrepFlag flag)
  198. {
  199. nsresult rv;
  200. NS_ConvertUTF8toUTF16 ustr(input);
  201. // map ideographic period to ASCII period etc.
  202. normalizeFullStops(ustr);
  203. uint32_t len, offset;
  204. len = 0;
  205. offset = 0;
  206. nsAutoCString encodedBuf;
  207. nsAString::const_iterator start, end;
  208. ustr.BeginReading(start);
  209. ustr.EndReading(end);
  210. ace.Truncate();
  211. // encode nodes if non ASCII
  212. while (start != end) {
  213. len++;
  214. if (*start++ == (char16_t)'.') {
  215. rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
  216. NS_ENSURE_SUCCESS(rv, rv);
  217. ace.Append(encodedBuf);
  218. ace.Append('.');
  219. offset += len;
  220. len = 0;
  221. }
  222. }
  223. // encode the last node if non ASCII
  224. if (len) {
  225. rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
  226. NS_ENSURE_SUCCESS(rv, rv);
  227. ace.Append(encodedBuf);
  228. }
  229. return NS_OK;
  230. }
  231. NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
  232. {
  233. return ACEtoUTF8(input, _retval, eStringPrepForDNS);
  234. }
  235. nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
  236. stringPrepFlag flag)
  237. {
  238. // RFC 3490 - 4.2 ToUnicode
  239. // ToUnicode never fails. If any step fails, then the original input
  240. // sequence is returned immediately in that step.
  241. uint32_t len = 0, offset = 0;
  242. nsAutoCString decodedBuf;
  243. nsACString::const_iterator start, end;
  244. input.BeginReading(start);
  245. input.EndReading(end);
  246. _retval.Truncate();
  247. // loop and decode nodes
  248. while (start != end) {
  249. len++;
  250. if (*start++ == '.') {
  251. if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf,
  252. flag))) {
  253. _retval.Assign(input);
  254. return NS_OK;
  255. }
  256. _retval.Append(decodedBuf);
  257. _retval.Append('.');
  258. offset += len;
  259. len = 0;
  260. }
  261. }
  262. // decode the last node
  263. if (len) {
  264. if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf,
  265. flag)))
  266. _retval.Assign(input);
  267. else
  268. _retval.Append(decodedBuf);
  269. }
  270. return NS_OK;
  271. }
  272. /**
  273. * Returns |true| if |aString| contains only ASCII characters according
  274. * to our CRT.
  275. *
  276. * @param aString an 8-bit wide string to scan
  277. */
  278. inline bool IsAsciiString(mozilla::Span<const char> aString) {
  279. for (char c : aString) {
  280. if (!nsCRT::IsAscii(c)) {
  281. return false;
  282. }
  283. }
  284. return true;
  285. }
  286. NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, bool *_retval)
  287. {
  288. // look for the ACE prefix in the input string. it may occur
  289. // at the beginning of any segment in the domain name. for
  290. // example: "www.xn--ENCODED.com"
  291. if (!IsAsciiString(input)) {
  292. *_retval = false;
  293. return NS_OK;
  294. }
  295. auto stringContains = [](const nsACString& haystack,
  296. const nsACString& needle) {
  297. return std::search(haystack.BeginReading(), haystack.EndReading(),
  298. needle.BeginReading(),
  299. needle.EndReading()) != haystack.EndReading();
  300. };
  301. *_retval = StringBeginsWith(input, NS_LITERAL_CSTRING("xn--")) ||
  302. (!input.IsEmpty() && input[0] != '.' &&
  303. stringContains(input, NS_LITERAL_CSTRING(".xn--")));
  304. return NS_OK;
  305. }
  306. NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input,
  307. nsACString & output)
  308. {
  309. // protect against bogus input
  310. NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
  311. NS_ConvertUTF8toUTF16 inUTF16(input);
  312. normalizeFullStops(inUTF16);
  313. // pass the domain name to stringprep label by label
  314. nsAutoString outUTF16, outLabel;
  315. uint32_t len = 0, offset = 0;
  316. nsresult rv;
  317. nsAString::const_iterator start, end;
  318. inUTF16.BeginReading(start);
  319. inUTF16.EndReading(end);
  320. while (start != end) {
  321. len++;
  322. if (*start++ == char16_t('.')) {
  323. rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
  324. eStringPrepIgnoreErrors);
  325. NS_ENSURE_SUCCESS(rv, rv);
  326. outUTF16.Append(outLabel);
  327. outUTF16.Append(char16_t('.'));
  328. offset += len;
  329. len = 0;
  330. }
  331. }
  332. if (len) {
  333. rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
  334. eStringPrepIgnoreErrors);
  335. NS_ENSURE_SUCCESS(rv, rv);
  336. outUTF16.Append(outLabel);
  337. }
  338. CopyUTF16toUTF8(outUTF16, output);
  339. return NS_OK;
  340. }
  341. NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, bool * _isASCII, nsACString & _retval)
  342. {
  343. // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
  344. // Else, if host is already UTF-8, then make sure it is normalized per IDN.
  345. nsresult rv = NS_OK;
  346. // Even if the hostname is not ASCII, individual labels may still be ACE, so
  347. // test IsACE before testing IsASCII
  348. bool isACE;
  349. IsACE(input, &isACE);
  350. if (IsASCII(input)) {
  351. // first, canonicalize the host to lowercase, for whitelist lookup
  352. _retval = input;
  353. ToLowerCase(_retval);
  354. if (isACE && !mShowPunycode) {
  355. // ACEtoUTF8() can't fail, but might return the original ACE string
  356. nsAutoCString temp(_retval);
  357. // If the domain is in the whitelist, return the host in UTF-8.
  358. // Otherwise convert from ACE to UTF8 only those labels which are
  359. // considered safe for display
  360. ACEtoUTF8(temp, _retval, isInWhitelist(temp) ?
  361. eStringPrepIgnoreErrors : eStringPrepForUI);
  362. *_isASCII = IsASCII(_retval);
  363. } else {
  364. *_isASCII = true;
  365. }
  366. } else {
  367. // We have to normalize the hostname before testing against the domain
  368. // whitelist (see bug 315411), and to ensure the entire string gets
  369. // normalized.
  370. //
  371. // Normalization and the tests for safe display below, assume that the
  372. // input is Unicode, so first convert any ACE labels to UTF8
  373. if (isACE) {
  374. nsAutoCString temp;
  375. ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
  376. rv = Normalize(temp, _retval);
  377. } else {
  378. rv = Normalize(input, _retval);
  379. }
  380. if (NS_FAILED(rv)) return rv;
  381. if (mShowPunycode && NS_SUCCEEDED(UTF8toACE(_retval, _retval,
  382. eStringPrepIgnoreErrors))) {
  383. *_isASCII = true;
  384. return NS_OK;
  385. }
  386. // normalization could result in an ASCII-only hostname. alternatively, if
  387. // the host is converted to ACE by the normalizer, then the host may contain
  388. // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
  389. *_isASCII = IsASCII(_retval);
  390. if (!*_isASCII && !isInWhitelist(_retval)) {
  391. // UTF8toACE with eStringPrepForUI may return a domain name where
  392. // some labels are in UTF-8 and some are in ACE, depending on
  393. // whether they are considered safe for display
  394. rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
  395. *_isASCII = IsASCII(_retval);
  396. return rv;
  397. }
  398. }
  399. return NS_OK;
  400. }
  401. //-----------------------------------------------------------------------------
  402. static nsresult utf16ToUcs4(const nsAString& in,
  403. uint32_t *out,
  404. uint32_t outBufLen,
  405. uint32_t *outLen)
  406. {
  407. uint32_t i = 0;
  408. nsAString::const_iterator start, end;
  409. in.BeginReading(start);
  410. in.EndReading(end);
  411. while (start != end) {
  412. char16_t curChar;
  413. curChar= *start++;
  414. if (start != end &&
  415. NS_IS_HIGH_SURROGATE(curChar) &&
  416. NS_IS_LOW_SURROGATE(*start)) {
  417. out[i] = SURROGATE_TO_UCS4(curChar, *start);
  418. ++start;
  419. }
  420. else
  421. out[i] = curChar;
  422. i++;
  423. if (i >= outBufLen)
  424. return NS_ERROR_MALFORMED_URI;
  425. }
  426. out[i] = (uint32_t)'\0';
  427. *outLen = i;
  428. return NS_OK;
  429. }
  430. static nsresult punycode(const nsAString& in, nsACString& out)
  431. {
  432. uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
  433. uint32_t ucs4Len = 0u;
  434. nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
  435. NS_ENSURE_SUCCESS(rv, rv);
  436. // need maximum 20 bits to encode 16 bit Unicode character
  437. // (include null terminator)
  438. const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
  439. char encodedBuf[kEncodedBufSize];
  440. punycode_uint encodedLength = kEncodedBufSize;
  441. enum punycode_status status = punycode_encode(ucs4Len,
  442. ucs4Buf,
  443. nullptr,
  444. &encodedLength,
  445. encodedBuf);
  446. if (punycode_success != status ||
  447. encodedLength >= kEncodedBufSize)
  448. return NS_ERROR_MALFORMED_URI;
  449. encodedBuf[encodedLength] = '\0';
  450. out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
  451. return rv;
  452. }
  453. // RFC 3454
  454. //
  455. // 1) Map -- For each character in the input, check if it has a mapping
  456. // and, if so, replace it with its mapping. This is described in section 3.
  457. //
  458. // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
  459. // normalization. This is described in section 4.
  460. //
  461. // 3) Prohibit -- Check for any characters that are not allowed in the
  462. // output. If any are found, return an error. This is described in section
  463. // 5.
  464. //
  465. // 4) Check bidi -- Possibly check for right-to-left characters, and if any
  466. // are found, make sure that the whole string satisfies the requirements
  467. // for bidirectional strings. If the string does not satisfy the requirements
  468. // for bidirectional strings, return an error. This is described in section 6.
  469. //
  470. // 5) Check unassigned code points -- If allowUnassigned is false, check for
  471. // any unassigned Unicode points and if any are found return an error.
  472. // This is described in section 7.
  473. //
  474. // => All this is handled by ICU's StringPrep().
  475. //
  476. nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
  477. stringPrepFlag flag)
  478. {
  479. return IDNA2008StringPrep(in, out, flag);
  480. }
  481. nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
  482. stringPrepFlag flag)
  483. {
  484. nsresult rv = NS_OK;
  485. out.Truncate();
  486. if (in.Length() > kMaxDNSNodeLen) {
  487. NS_WARNING("IDN node too large");
  488. return NS_ERROR_MALFORMED_URI;
  489. }
  490. if (IsASCII(in)) {
  491. LossyCopyUTF16toASCII(in, out);
  492. return NS_OK;
  493. }
  494. nsAutoString strPrep;
  495. rv = stringPrep(in, strPrep, flag);
  496. if (flag == eStringPrepForDNS) {
  497. NS_ENSURE_SUCCESS(rv, rv);
  498. }
  499. if (IsASCII(strPrep)) {
  500. LossyCopyUTF16toASCII(strPrep, out);
  501. return NS_OK;
  502. }
  503. if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
  504. CopyUTF16toUTF8(strPrep, out);
  505. return NS_OK;
  506. }
  507. rv = punycode(strPrep, out);
  508. // Check that the encoded output isn't larger than the maximum length
  509. // of a DNS node per RFC 1034.
  510. // This test isn't necessary in the code paths above where the input
  511. // is ASCII (since the output will be the same length as the input) or
  512. // where we convert to UTF-8 (since the output is only used for
  513. // display in the UI and not passed to DNS and can legitimately be
  514. // longer than the limit).
  515. if (out.Length() > kMaxDNSNodeLen) {
  516. NS_WARNING("IDN node too large");
  517. return NS_ERROR_MALFORMED_URI;
  518. }
  519. return rv;
  520. }
  521. // RFC 3490
  522. // 1) Whenever dots are used as label separators, the following characters
  523. // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
  524. // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
  525. // stop).
  526. void nsIDNService::normalizeFullStops(nsAString& s)
  527. {
  528. nsAString::const_iterator start, end;
  529. s.BeginReading(start);
  530. s.EndReading(end);
  531. int32_t index = 0;
  532. while (start != end) {
  533. switch (*start) {
  534. case 0x3002:
  535. case 0xFF0E:
  536. case 0xFF61:
  537. s.Replace(index, 1, NS_LITERAL_STRING("."));
  538. break;
  539. default:
  540. break;
  541. }
  542. start++;
  543. index++;
  544. }
  545. }
  546. nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
  547. stringPrepFlag flag)
  548. {
  549. bool isAce;
  550. IsACE(in, &isAce);
  551. if (!isAce) {
  552. out.Assign(in);
  553. return NS_OK;
  554. }
  555. nsAutoString utf16;
  556. nsresult result = IDNA2008ToUnicode(in, utf16);
  557. NS_ENSURE_SUCCESS(result, result);
  558. if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
  559. CopyUTF16toUTF8(utf16, out);
  560. } else {
  561. out.Assign(in);
  562. return NS_OK;
  563. }
  564. // Validation: encode back to ACE and compare the strings
  565. nsAutoCString ace;
  566. nsresult rv = UTF8toACE(out, ace, flag);
  567. NS_ENSURE_SUCCESS(rv, rv);
  568. if (flag == eStringPrepForDNS &&
  569. !ace.Equals(in, nsCaseInsensitiveCStringComparator())) {
  570. return NS_ERROR_MALFORMED_URI;
  571. }
  572. return NS_OK;
  573. }
  574. bool nsIDNService::isInWhitelist(const nsACString &host)
  575. {
  576. if (mIDNUseWhitelist && mIDNWhitelistPrefBranch) {
  577. nsAutoCString tld(host);
  578. // make sure the host is ACE for lookup and check that there are no
  579. // unassigned codepoints
  580. if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
  581. return false;
  582. }
  583. // truncate trailing dots first
  584. tld.Trim(".");
  585. int32_t pos = tld.RFind(".");
  586. if (pos == kNotFound)
  587. return false;
  588. tld.Cut(0, pos + 1);
  589. bool safe;
  590. if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
  591. return safe;
  592. }
  593. return false;
  594. }
  595. bool nsIDNService::isLabelSafe(const nsAString &label)
  596. {
  597. if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlacklist)) {
  598. return false;
  599. }
  600. // We should never get here if the label is ASCII
  601. NS_ASSERTION(!IsASCII(label), "ASCII label in IDN checking");
  602. if (mRestrictionProfile == eASCIIOnlyProfile) {
  603. return false;
  604. }
  605. nsAString::const_iterator current, end;
  606. label.BeginReading(current);
  607. label.EndReading(end);
  608. Script lastScript = Script::INVALID;
  609. uint32_t previousChar = 0;
  610. uint32_t baseChar = 0; // last non-diacritic seen (base char for marks)
  611. uint32_t savedNumberingSystem = 0;
  612. // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
  613. #if 0
  614. HanVariantType savedHanVariant = HVT_NotHan;
  615. #endif
  616. int32_t savedScript = -1;
  617. while (current != end) {
  618. uint32_t ch = *current++;
  619. if (NS_IS_HIGH_SURROGATE(ch) && current != end &&
  620. NS_IS_LOW_SURROGATE(*current)) {
  621. ch = SURROGATE_TO_UCS4(ch, *current++);
  622. }
  623. // Check for restricted characters; aspirational scripts are NOT permitted,
  624. // in anticipation of the category being merged into Limited-Use scripts
  625. // in the upcoming (Unicode 10.0-based) revision of UAX #31.
  626. XidmodType xm = GetIdentifierModification(ch);
  627. if (xm != XIDMOD_RECOMMENDED &&
  628. xm != XIDMOD_INCLUSION) {
  629. return false;
  630. }
  631. // Check for mixed script
  632. Script script = GetScriptCode(ch);
  633. if (script != Script::COMMON &&
  634. script != Script::INHERITED &&
  635. script != lastScript) {
  636. if (illegalScriptCombo(script, savedScript)) {
  637. return false;
  638. }
  639. lastScript = script;
  640. }
  641. // Check for mixed numbering systems
  642. auto genCat = GetGeneralCategory(ch);
  643. if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
  644. uint32_t zeroCharacter = ch - GetNumericValue(ch);
  645. if (savedNumberingSystem == 0) {
  646. // If we encounter a decimal number, save the zero character from that
  647. // numbering system.
  648. savedNumberingSystem = zeroCharacter;
  649. } else if (zeroCharacter != savedNumberingSystem) {
  650. return false;
  651. }
  652. }
  653. if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
  654. // Check for consecutive non-spacing marks
  655. if (previousChar != 0 && previousChar == ch) {
  656. return false;
  657. }
  658. // Check for marks whose expected script doesn't match the base script.
  659. if (lastScript != Script::INVALID) {
  660. const size_t kMaxScripts = 32; // more than ample for current values
  661. // of ScriptExtensions property
  662. UScriptCode scripts[kMaxScripts];
  663. UErrorCode errorCode = U_ZERO_ERROR;
  664. int nScripts = uscript_getScriptExtensions(ch, scripts, kMaxScripts,
  665. &errorCode);
  666. MOZ_ASSERT(U_SUCCESS(errorCode), "uscript_getScriptExtensions failed");
  667. if (U_FAILURE(errorCode)) {
  668. return false;
  669. }
  670. // nScripts will always be >= 1, because even for undefined characters
  671. // uscript_getScriptExtensions will return Script::INVALID.
  672. // If the mark just has script=COMMON or INHERITED, we can't check any
  673. // more carefully, but if it has specific scriptExtension codes, then
  674. // assume those are the only valid scripts to use it with.
  675. if (nScripts > 1 ||
  676. (Script(scripts[0]) != Script::COMMON &&
  677. Script(scripts[0]) != Script::INHERITED)) {
  678. while (--nScripts >= 0) {
  679. if (Script(scripts[nScripts]) == lastScript) {
  680. break;
  681. }
  682. }
  683. if (nScripts == -1) {
  684. return false;
  685. }
  686. }
  687. }
  688. // Check for diacritics on dotless-i or dotless-j, which would be
  689. // indistinguishable from normal accented letter.
  690. if ((baseChar == 0x0237 || baseChar == 0x0131) &&
  691. ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
  692. return false;
  693. }
  694. } else {
  695. baseChar = ch;
  696. }
  697. // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
  698. #if 0
  699. // Check for both simplified-only and traditional-only Chinese characters
  700. HanVariantType hanVariant = GetHanVariant(ch);
  701. if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
  702. if (savedHanVariant == HVT_NotHan) {
  703. savedHanVariant = hanVariant;
  704. } else if (hanVariant != savedHanVariant) {
  705. return false;
  706. }
  707. }
  708. #endif
  709. previousChar = ch;
  710. }
  711. return true;
  712. }
  713. // Scripts that we care about in illegalScriptCombo
  714. static const Script scriptTable[] = {
  715. Script::BOPOMOFO, Script::CYRILLIC, Script::GREEK,
  716. Script::HANGUL, Script::HAN, Script::HIRAGANA,
  717. Script::KATAKANA, Script::LATIN };
  718. #define BOPO 0
  719. #define CYRL 1
  720. #define GREK 2
  721. #define HANG 3
  722. #define HANI 4
  723. #define HIRA 5
  724. #define KATA 6
  725. #define LATN 7
  726. #define OTHR 8
  727. #define JPAN 9 // Latin + Han + Hiragana + Katakana
  728. #define CHNA 10 // Latin + Han + Bopomofo
  729. #define KORE 11 // Latin + Han + Hangul
  730. #define HNLT 12 // Latin + Han (could be any of the above combinations)
  731. #define FAIL 13
  732. static inline int32_t findScriptIndex(Script aScript)
  733. {
  734. int32_t tableLength = sizeof(scriptTable) / sizeof(int32_t);
  735. for (int32_t index = 0; index < tableLength; ++index) {
  736. if (aScript == scriptTable[index]) {
  737. return index;
  738. }
  739. }
  740. return OTHR;
  741. }
  742. static const int32_t scriptComboTable[13][9] = {
  743. /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
  744. * savedScript */
  745. /* BOPO */ { BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
  746. /* CYRL */ { FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
  747. /* GREK */ { FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
  748. /* HANG */ { FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL },
  749. /* HANI */ { CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL },
  750. /* HIRA */ { FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL },
  751. /* KATA */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL },
  752. /* LATN */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR },
  753. /* OTHR */ { FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL },
  754. /* JPAN */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL },
  755. /* CHNA */ { CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
  756. /* KORE */ { FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL },
  757. /* HNLT */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL }
  758. };
  759. bool nsIDNService::illegalScriptCombo(Script script, int32_t& savedScript)
  760. {
  761. if (savedScript == -1) {
  762. savedScript = findScriptIndex(script);
  763. return false;
  764. }
  765. savedScript = scriptComboTable[savedScript] [findScriptIndex(script)];
  766. /*
  767. * Special case combinations that depend on which profile is in use
  768. * In the Highly Restrictive profile Latin is not allowed with any
  769. * other script
  770. *
  771. * In the Moderately Restrictive profile Latin mixed with any other
  772. * single script is allowed.
  773. */
  774. return ((savedScript == OTHR &&
  775. mRestrictionProfile == eHighlyRestrictiveProfile) ||
  776. savedScript == FAIL);
  777. }
  778. #undef BOPO
  779. #undef CYRL
  780. #undef GREK
  781. #undef HANG
  782. #undef HANI
  783. #undef HIRA
  784. #undef KATA
  785. #undef LATN
  786. #undef OTHR
  787. #undef JPAN
  788. #undef CHNA
  789. #undef KORE
  790. #undef HNLT
  791. #undef FAIL