nsScriptableUConv.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsString.h"
  6. #include "nsIScriptableUConv.h"
  7. #include "nsScriptableUConv.h"
  8. #include "nsIStringStream.h"
  9. #include "nsComponentManagerUtils.h"
  10. #include "nsIUnicodeDecoder.h"
  11. #include "nsIUnicodeEncoder.h"
  12. #include "mozilla/dom/EncodingUtils.h"
  13. #include "mozilla/CheckedInt.h"
  14. using mozilla::dom::EncodingUtils;
  15. /* Implementation file */
  16. NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
  17. nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
  18. : mIsInternal(false)
  19. {
  20. }
  21. nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter()
  22. {
  23. }
  24. nsresult
  25. nsScriptableUnicodeConverter::ConvertFromUnicodeWithLength(const nsAString& aSrc,
  26. int32_t* aOutLen,
  27. char **_retval)
  28. {
  29. if (!mEncoder)
  30. return NS_ERROR_FAILURE;
  31. nsresult rv = NS_OK;
  32. int32_t inLength = aSrc.Length();
  33. const nsAFlatString& flatSrc = PromiseFlatString(aSrc);
  34. rv = mEncoder->GetMaxLength(flatSrc.get(), inLength, aOutLen);
  35. if (NS_SUCCEEDED(rv)) {
  36. mozilla::CheckedInt<int32_t> needed(*aOutLen);
  37. needed += 1;
  38. if (!needed.isValid()) {
  39. return NS_ERROR_OUT_OF_MEMORY;
  40. }
  41. *_retval = (char*)malloc(needed.value());
  42. if (!*_retval)
  43. return NS_ERROR_OUT_OF_MEMORY;
  44. rv = mEncoder->Convert(flatSrc.get(), &inLength, *_retval, aOutLen);
  45. if (NS_SUCCEEDED(rv))
  46. {
  47. (*_retval)[*aOutLen] = '\0';
  48. return NS_OK;
  49. }
  50. free(*_retval);
  51. }
  52. *_retval = nullptr;
  53. return NS_ERROR_FAILURE;
  54. }
  55. NS_IMETHODIMP
  56. nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
  57. nsACString& _retval)
  58. {
  59. int32_t len;
  60. char* str;
  61. nsresult rv = ConvertFromUnicodeWithLength(aSrc, &len, &str);
  62. if (NS_SUCCEEDED(rv)) {
  63. // No Adopt on nsACString :(
  64. if (!_retval.Assign(str, len, mozilla::fallible)) {
  65. rv = NS_ERROR_OUT_OF_MEMORY;
  66. }
  67. free(str);
  68. }
  69. return rv;
  70. }
  71. nsresult
  72. nsScriptableUnicodeConverter::FinishWithLength(char **_retval, int32_t* aLength)
  73. {
  74. if (!mEncoder)
  75. return NS_ERROR_FAILURE;
  76. int32_t finLength = 32;
  77. *_retval = (char *)malloc(finLength);
  78. if (!*_retval)
  79. return NS_ERROR_OUT_OF_MEMORY;
  80. nsresult rv = mEncoder->Finish(*_retval, &finLength);
  81. if (NS_SUCCEEDED(rv))
  82. *aLength = finLength;
  83. else
  84. free(*_retval);
  85. return rv;
  86. }
  87. NS_IMETHODIMP
  88. nsScriptableUnicodeConverter::Finish(nsACString& _retval)
  89. {
  90. // The documentation for this method says it should be called after
  91. // ConvertFromUnicode(). However, our own tests called it after
  92. // convertFromByteArray(), i.e. when *decoding*.
  93. // Assuming that there exists extensions that similarly call
  94. // this at the wrong time, let's deal. In general, it is a design
  95. // error for this class to handle conversions in both directions.
  96. if (!mEncoder) {
  97. _retval.Truncate();
  98. return NS_OK;
  99. }
  100. int32_t len;
  101. char* str;
  102. nsresult rv = FinishWithLength(&str, &len);
  103. if (NS_SUCCEEDED(rv)) {
  104. // No Adopt on nsACString :(
  105. if (!_retval.Assign(str, len, mozilla::fallible)) {
  106. rv = NS_ERROR_OUT_OF_MEMORY;
  107. }
  108. free(str);
  109. }
  110. return rv;
  111. }
  112. NS_IMETHODIMP
  113. nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval)
  114. {
  115. nsACString::const_iterator i;
  116. aSrc.BeginReading(i);
  117. return ConvertFromByteArray(reinterpret_cast<const uint8_t*>(i.get()),
  118. aSrc.Length(),
  119. _retval);
  120. }
  121. NS_IMETHODIMP
  122. nsScriptableUnicodeConverter::ConvertFromByteArray(const uint8_t* aData,
  123. uint32_t aCount,
  124. nsAString& _retval)
  125. {
  126. if (!mDecoder)
  127. return NS_ERROR_FAILURE;
  128. nsresult rv = NS_OK;
  129. int32_t inLength = aCount;
  130. int32_t outLength;
  131. rv = mDecoder->GetMaxLength(reinterpret_cast<const char*>(aData),
  132. inLength, &outLength);
  133. if (NS_SUCCEEDED(rv))
  134. {
  135. mozilla::CheckedInt<nsACString::size_type> needed(outLength);
  136. needed += 1;
  137. needed *= sizeof(char16_t);
  138. if (!needed.isValid()) {
  139. return NS_ERROR_OUT_OF_MEMORY;
  140. }
  141. char16_t* buf = (char16_t*)malloc(needed.value());
  142. if (!buf)
  143. return NS_ERROR_OUT_OF_MEMORY;
  144. rv = mDecoder->Convert(reinterpret_cast<const char*>(aData),
  145. &inLength, buf, &outLength);
  146. if (NS_SUCCEEDED(rv))
  147. {
  148. buf[outLength] = 0;
  149. if (!_retval.Assign(buf, outLength, mozilla::fallible)) {
  150. rv = NS_ERROR_OUT_OF_MEMORY;
  151. }
  152. }
  153. free(buf);
  154. return rv;
  155. }
  156. return NS_ERROR_FAILURE;
  157. }
  158. NS_IMETHODIMP
  159. nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
  160. uint32_t* aLen,
  161. uint8_t** _aData)
  162. {
  163. char* data;
  164. int32_t len;
  165. nsresult rv = ConvertFromUnicodeWithLength(aString, &len, &data);
  166. if (NS_FAILED(rv))
  167. return rv;
  168. nsXPIDLCString str;
  169. str.Adopt(data, len); // NOTE: This uses the XPIDLCString as a byte array
  170. rv = FinishWithLength(&data, &len);
  171. if (NS_FAILED(rv))
  172. return rv;
  173. str.Append(data, len);
  174. free(data);
  175. // NOTE: this being a byte array, it needs no null termination
  176. *_aData = reinterpret_cast<uint8_t*>(malloc(str.Length()));
  177. if (!*_aData)
  178. return NS_ERROR_OUT_OF_MEMORY;
  179. memcpy(*_aData, str.get(), str.Length());
  180. *aLen = str.Length();
  181. return NS_OK;
  182. }
  183. NS_IMETHODIMP
  184. nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
  185. nsIInputStream** _retval)
  186. {
  187. nsresult rv;
  188. nsCOMPtr<nsIStringInputStream> inputStream =
  189. do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
  190. if (NS_FAILED(rv))
  191. return rv;
  192. uint8_t* data;
  193. uint32_t dataLen;
  194. rv = ConvertToByteArray(aString, &dataLen, &data);
  195. if (NS_FAILED(rv))
  196. return rv;
  197. rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
  198. if (NS_FAILED(rv)) {
  199. free(data);
  200. return rv;
  201. }
  202. NS_ADDREF(*_retval = inputStream);
  203. return rv;
  204. }
  205. NS_IMETHODIMP
  206. nsScriptableUnicodeConverter::GetCharset(char * *aCharset)
  207. {
  208. *aCharset = ToNewCString(mCharset);
  209. if (!*aCharset)
  210. return NS_ERROR_OUT_OF_MEMORY;
  211. return NS_OK;
  212. }
  213. NS_IMETHODIMP
  214. nsScriptableUnicodeConverter::SetCharset(const char * aCharset)
  215. {
  216. mCharset.Assign(aCharset);
  217. return InitConverter();
  218. }
  219. NS_IMETHODIMP
  220. nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal)
  221. {
  222. *aIsInternal = mIsInternal;
  223. return NS_OK;
  224. }
  225. NS_IMETHODIMP
  226. nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal)
  227. {
  228. mIsInternal = aIsInternal;
  229. return NS_OK;
  230. }
  231. nsresult
  232. nsScriptableUnicodeConverter::InitConverter()
  233. {
  234. mEncoder = nullptr;
  235. mDecoder = nullptr;
  236. nsAutoCString encoding;
  237. if (mIsInternal) {
  238. // For compatibility with legacy extensions, let's try to see if the label
  239. // happens to be ASCII-case-insensitively an encoding. This should allow
  240. // for things like "utf-7" and "x-Mac-Hebrew".
  241. nsAutoCString contractId;
  242. nsAutoCString label(mCharset);
  243. EncodingUtils::TrimSpaceCharacters(label);
  244. // Let's try in lower case if we didn't get an decoder. E.g. x-mac-ce
  245. // and x-imap4-modified-utf7 are all lower case.
  246. ToLowerCase(label);
  247. if (label.EqualsLiteral("replacement")) {
  248. // reject "replacement"
  249. return NS_ERROR_UCONV_NOCONV;
  250. }
  251. contractId.AssignLiteral(NS_UNICODEENCODER_CONTRACTID_BASE);
  252. contractId.Append(label);
  253. mEncoder = do_CreateInstance(contractId.get());
  254. contractId.AssignLiteral(NS_UNICODEDECODER_CONTRACTID_BASE);
  255. contractId.Append(label);
  256. mDecoder = do_CreateInstance(contractId.get());
  257. if (!mDecoder) {
  258. // The old code seemed to want both a decoder and an encoder. Since some
  259. // internal encodings will be decoder-only in the future, let's relax
  260. // this. Note that the other methods check mEncoder for null anyway.
  261. // Let's try the upper case. E.g. UTF-7 and ISO-2022-CN have upper
  262. // case Gecko-canonical names.
  263. ToUpperCase(label);
  264. contractId.AssignLiteral(NS_UNICODEENCODER_CONTRACTID_BASE);
  265. contractId.Append(label);
  266. mEncoder = do_CreateInstance(contractId.get());
  267. contractId.AssignLiteral(NS_UNICODEDECODER_CONTRACTID_BASE);
  268. contractId.Append(label);
  269. mDecoder = do_CreateInstance(contractId.get());
  270. // If still no decoder, use the normal non-internal case below.
  271. }
  272. }
  273. if (!mDecoder) {
  274. if (!EncodingUtils::FindEncodingForLabelNoReplacement(mCharset, encoding)) {
  275. return NS_ERROR_UCONV_NOCONV;
  276. }
  277. mEncoder = EncodingUtils::EncoderForEncoding(encoding);
  278. mDecoder = EncodingUtils::DecoderForEncoding(encoding);
  279. }
  280. // The UTF-8 decoder used to throw regardless of the error behavior.
  281. // Simulating the old behavior for compatibility with legacy callers
  282. // (including addons). If callers want a control over the behavior,
  283. // they should switch to TextDecoder.
  284. if (encoding.EqualsLiteral("UTF-8")) {
  285. mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
  286. }
  287. if (!mEncoder) {
  288. return NS_OK;
  289. }
  290. return mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace,
  291. nullptr,
  292. (char16_t)'?');
  293. }