nsUnicodeToBIG5.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsUnicodeToBIG5.h"
  6. NS_IMPL_ADDREF(nsUnicodeToBIG5)
  7. NS_IMPL_RELEASE(nsUnicodeToBIG5)
  8. NS_IMPL_QUERY_INTERFACE(nsUnicodeToBIG5,
  9. nsIUnicodeEncoder)
  10. nsUnicodeToBIG5::nsUnicodeToBIG5()
  11. : mUtf16Lead(0)
  12. , mPendingTrail(0)
  13. , mSignal(true) // as in nsEncoderSupport
  14. {
  15. }
  16. NS_IMETHODIMP
  17. nsUnicodeToBIG5::Convert(const char16_t* aSrc,
  18. int32_t* aSrcLength,
  19. char* aDest,
  20. int32_t * aDestLength)
  21. {
  22. const char16_t* in = aSrc;
  23. const char16_t* inEnd = in + *aSrcLength;
  24. uint8_t* out = reinterpret_cast<uint8_t*>(aDest);
  25. uint8_t* outEnd = out + *aDestLength;
  26. MOZ_ASSERT(!(mPendingTrail && mUtf16Lead),
  27. "Can't have both pending output and pending input.");
  28. if (mPendingTrail) {
  29. if (out == outEnd) {
  30. *aSrcLength = 0;
  31. *aDestLength = 0;
  32. return NS_OK_UENC_MOREOUTPUT;
  33. }
  34. *out++ = mPendingTrail;
  35. mPendingTrail = 0;
  36. }
  37. for (;;) {
  38. if (in == inEnd) {
  39. *aSrcLength = in - aSrc;
  40. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  41. return mUtf16Lead ? NS_OK_UENC_MOREINPUT : NS_OK;
  42. }
  43. if (out == outEnd) {
  44. *aSrcLength = in - aSrc;
  45. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  46. return NS_OK_UENC_MOREOUTPUT;
  47. }
  48. bool isAstral; // true means Plane 2, false means BMP
  49. char16_t lowBits; // The low 16 bits of the code point
  50. char16_t codeUnit = *in++;
  51. size_t highBits = (codeUnit & 0xFC00);
  52. if (highBits == 0xD800) {
  53. // high surrogate
  54. if (mUtf16Lead) {
  55. // High surrogate follows another high surrogate. The
  56. // *previous* code unit is in error.
  57. if (mSignal) {
  58. mUtf16Lead = 0;
  59. // NOTE: Encode API differs from decode API!
  60. --in;
  61. *aSrcLength = in - aSrc;
  62. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  63. return NS_ERROR_UENC_NOMAPPING;
  64. }
  65. *out++ = '?';
  66. }
  67. mUtf16Lead = codeUnit;
  68. continue;
  69. }
  70. if (highBits == 0xDC00) {
  71. // low surrogate
  72. if (!mUtf16Lead) {
  73. // Got low surrogate without a previous high surrogate
  74. if (mSignal) {
  75. // NOTE: Encode API differs from decode API!
  76. *aSrcLength = in - aSrc;
  77. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  78. return NS_ERROR_UENC_NOMAPPING;
  79. }
  80. *out++ = '?';
  81. continue;
  82. }
  83. size_t codePoint = (mUtf16Lead << 10) + codeUnit -
  84. (((0xD800 << 10) - 0x10000) + 0xDC00);
  85. mUtf16Lead = 0;
  86. // Plane 2 is the only astral plane that has potentially
  87. // Big5-encodable characters.
  88. if ((0xFF0000 & codePoint) != 0x20000) {
  89. if (mSignal) {
  90. // NOTE: Encode API differs from decode API!
  91. // nsSaveAsCharset wants us to back up on step in the case of a
  92. // surrogate pair.
  93. --in;
  94. *aSrcLength = in - aSrc;
  95. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  96. return NS_ERROR_UENC_NOMAPPING;
  97. }
  98. *out++ = '?';
  99. continue;
  100. }
  101. isAstral = true;
  102. lowBits = (char16_t)(codePoint & 0xFFFF);
  103. } else {
  104. // not a surrogate
  105. if (mUtf16Lead) {
  106. // Non-surrogate follows a high surrogate. The *previous*
  107. // code unit is in error.
  108. mUtf16Lead = 0;
  109. if (mSignal) {
  110. // NOTE: Encode API differs from decode API!
  111. --in;
  112. *aSrcLength = in - aSrc;
  113. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  114. return NS_ERROR_UENC_NOMAPPING;
  115. }
  116. *out++ = '?';
  117. // Let's unconsume this code unit and reloop in order to
  118. // re-check if the output buffer still has space.
  119. --in;
  120. continue;
  121. }
  122. isAstral = false;
  123. lowBits = codeUnit;
  124. }
  125. // isAstral now tells us if we have a Plane 2 or a BMP character.
  126. // lowBits tells us the low 16 bits.
  127. // After all the above setup to deal with UTF-16, we are now
  128. // finally ready to follow the spec.
  129. if (!isAstral && lowBits <= 0x7F) {
  130. *out++ = (uint8_t)lowBits;
  131. continue;
  132. }
  133. size_t pointer = nsBIG5Data::FindPointer(lowBits, isAstral);
  134. if (!pointer) {
  135. if (mSignal) {
  136. // NOTE: Encode API differs from decode API!
  137. if (isAstral) {
  138. // nsSaveAsCharset wants us to back up on step in the case of a
  139. // surrogate pair.
  140. --in;
  141. }
  142. *aSrcLength = in - aSrc;
  143. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  144. return NS_ERROR_UENC_NOMAPPING;
  145. }
  146. *out++ = '?';
  147. continue;
  148. }
  149. uint8_t lead = (uint8_t)(pointer / 157 + 0x81);
  150. uint8_t trail = (uint8_t)(pointer % 157);
  151. if (trail < 0x3F) {
  152. trail += 0x40;
  153. } else {
  154. trail += 0x62;
  155. }
  156. *out++ = lead;
  157. if (out == outEnd) {
  158. mPendingTrail = trail;
  159. *aSrcLength = in - aSrc;
  160. *aDestLength = out - reinterpret_cast<uint8_t*>(aDest);
  161. return NS_OK_UENC_MOREOUTPUT;
  162. }
  163. *out++ = trail;
  164. continue;
  165. }
  166. }
  167. NS_IMETHODIMP
  168. nsUnicodeToBIG5::Finish(char* aDest,
  169. int32_t* aDestLength)
  170. {
  171. MOZ_ASSERT(!(mPendingTrail && mUtf16Lead),
  172. "Can't have both pending output and pending input.");
  173. uint8_t* out = reinterpret_cast<uint8_t*>(aDest);
  174. if (mPendingTrail) {
  175. if (*aDestLength < 1) {
  176. *aDestLength = 0;
  177. return NS_OK_UENC_MOREOUTPUT;
  178. }
  179. *out = mPendingTrail;
  180. mPendingTrail = 0;
  181. *aDestLength = 1;
  182. return NS_OK;
  183. }
  184. if (mUtf16Lead) {
  185. if (*aDestLength < 1) {
  186. *aDestLength = 0;
  187. return NS_OK_UENC_MOREOUTPUT;
  188. }
  189. mUtf16Lead = 0;
  190. if (mSignal) {
  191. *aDestLength = 0;
  192. return NS_ERROR_UENC_NOMAPPING;
  193. }
  194. *out = '?';
  195. *aDestLength = 1;
  196. return NS_OK;
  197. }
  198. *aDestLength = 0;
  199. return NS_OK;
  200. }
  201. NS_IMETHODIMP
  202. nsUnicodeToBIG5::GetMaxLength(const char16_t* aSrc,
  203. int32_t aSrcLength,
  204. int32_t* aDestLength)
  205. {
  206. mozilla::CheckedInt32 length = aSrcLength;
  207. length *= 2;
  208. if (mPendingTrail) {
  209. length += 1;
  210. }
  211. // If the lead ends up being paired, the bytes produced
  212. // are already included above.
  213. // If not, it produces a single '?'.
  214. if (mUtf16Lead) {
  215. length += 1;
  216. }
  217. if (!length.isValid()) {
  218. return NS_ERROR_OUT_OF_MEMORY;
  219. }
  220. *aDestLength = length.value();
  221. return NS_OK;
  222. }
  223. NS_IMETHODIMP
  224. nsUnicodeToBIG5::Reset()
  225. {
  226. mUtf16Lead = 0;
  227. mPendingTrail = 0;
  228. return NS_OK;
  229. }
  230. NS_IMETHODIMP
  231. nsUnicodeToBIG5::SetOutputErrorBehavior(int32_t aBehavior,
  232. nsIUnicharEncoder* aEncoder,
  233. char16_t aChar)
  234. {
  235. switch (aBehavior) {
  236. case kOnError_Signal:
  237. mSignal = true;
  238. break;
  239. case kOnError_Replace:
  240. mSignal = false;
  241. MOZ_ASSERT(aChar == '?', "Unsupported replacement.");
  242. break;
  243. case kOnError_CallBack:
  244. MOZ_ASSERT_UNREACHABLE("kOnError_CallBack is supposed to be unused.");
  245. break;
  246. default:
  247. MOZ_ASSERT_UNREACHABLE("Non-existent enum item.");
  248. break;
  249. }
  250. return NS_OK;
  251. }