nsBIG5ToUnicode.cpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsBIG5ToUnicode.h"
  6. #include "mozilla/BinarySearch.h"
  7. #include "mozilla/ArrayUtils.h"
  8. #include "nsBIG5Data.h"
  9. nsBIG5ToUnicode::nsBIG5ToUnicode()
  10. : mPendingTrail(0)
  11. , mBig5Lead(0)
  12. {
  13. }
  14. NS_IMETHODIMP
  15. nsBIG5ToUnicode::Convert(const char* aSrc,
  16. int32_t* aSrcLength,
  17. char16_t* aDest,
  18. int32_t* aDestLength)
  19. {
  20. // We'll be doing comparisons as unsigned.
  21. const uint8_t* in = reinterpret_cast<const uint8_t*>(aSrc);
  22. const uint8_t* inEnd = in + *aSrcLength;
  23. char16_t* out = aDest;
  24. char16_t* outEnd = out + *aDestLength;
  25. if (mPendingTrail) {
  26. if (out == outEnd) {
  27. *aSrcLength = 0;
  28. *aDestLength = 0;
  29. return NS_OK_UDEC_MOREOUTPUT;
  30. }
  31. *out++ = mPendingTrail;
  32. mPendingTrail = 0;
  33. }
  34. for (;;) {
  35. if (in == inEnd) {
  36. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  37. *aDestLength = out - aDest;
  38. return mBig5Lead ? NS_OK_UDEC_MOREINPUT : NS_OK;
  39. }
  40. if (out == outEnd) {
  41. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  42. *aDestLength = out - aDest;
  43. return NS_OK_UDEC_MOREOUTPUT;
  44. }
  45. uint8_t b = *in++;
  46. if (!mBig5Lead) {
  47. if (b <= 0x7F) {
  48. *out++ = (char16_t)b;
  49. continue;
  50. }
  51. if (b >= 0x81 && b <= 0xFE) {
  52. mBig5Lead = b;
  53. continue;
  54. }
  55. if (mErrBehavior == kOnError_Signal) {
  56. --in;
  57. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  58. *aDestLength = out - aDest;
  59. return NS_ERROR_ILLEGAL_INPUT;
  60. }
  61. *out++ = 0xFFFD;
  62. continue;
  63. }
  64. size_t lead = mBig5Lead;
  65. mBig5Lead = 0;
  66. size_t offset = (b < 0x7F) ? 0x40 : 0x62;
  67. if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) {
  68. size_t pointer = (lead - 0x81) * 157L + (b - offset);
  69. char16_t outTrail;
  70. switch (pointer) {
  71. case 1133:
  72. *out++ = 0x00CA;
  73. outTrail = 0x0304;
  74. break;
  75. case 1135:
  76. *out++ = 0x00CA;
  77. outTrail = 0x030C;
  78. break;
  79. case 1164:
  80. *out++ = 0x00EA;
  81. outTrail = 0x0304;
  82. break;
  83. case 1166:
  84. *out++ = 0x00EA;
  85. outTrail = 0x030C;
  86. break;
  87. default:
  88. char16_t lowBits = nsBIG5Data::LowBits(pointer);
  89. if (!lowBits) {
  90. if (b <= 0x7F) {
  91. // prepend byte to stream
  92. // Always legal, since we've always just read a byte
  93. // if we come here.
  94. --in;
  95. }
  96. if (mErrBehavior == kOnError_Signal) {
  97. --in;
  98. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  99. *aDestLength = out - aDest;
  100. return NS_ERROR_ILLEGAL_INPUT;
  101. }
  102. *out++ = 0xFFFD;
  103. continue;
  104. }
  105. if (nsBIG5Data::IsAstral(pointer)) {
  106. uint32_t codePoint = uint32_t(lowBits) | 0x20000;
  107. *out++ = char16_t(0xD7C0 + (codePoint >> 10));
  108. outTrail = char16_t(0xDC00 + (codePoint & 0x3FF));
  109. break;
  110. }
  111. *out++ = lowBits;
  112. continue;
  113. }
  114. if (out == outEnd) {
  115. mPendingTrail = outTrail;
  116. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  117. *aDestLength = out - aDest;
  118. return NS_OK_UDEC_MOREOUTPUT;
  119. }
  120. *out++ = outTrail;
  121. continue;
  122. }
  123. // pointer is null
  124. if (b <= 0x7F) {
  125. // prepend byte to stream
  126. // Always legal, since we've always just read a byte
  127. // if we come here.
  128. --in;
  129. }
  130. if (mErrBehavior == kOnError_Signal) {
  131. // Moving in one past the start of aSrc is actually OK per API contract,
  132. // since assigning -1 to aSrcLength means that we want the caller to
  133. // record one U+FFFD and repush the same input buffer.
  134. --in;
  135. *aSrcLength = in - reinterpret_cast<const uint8_t*>(aSrc);
  136. *aDestLength = out - aDest;
  137. return NS_ERROR_ILLEGAL_INPUT;
  138. }
  139. *out++ = 0xFFFD;
  140. continue;
  141. }
  142. }
  143. NS_IMETHODIMP
  144. nsBIG5ToUnicode::GetMaxLength(const char* aSrc,
  145. int32_t aSrcLength,
  146. int32_t* aDestLength)
  147. {
  148. // The length of the output in UTF-16 code units never exceeds the length
  149. // of the input in bytes.
  150. mozilla::CheckedInt32 length = aSrcLength;
  151. if (mPendingTrail) {
  152. length += 1;
  153. }
  154. if (mBig5Lead) {
  155. length += 1;
  156. }
  157. if (!length.isValid()) {
  158. return NS_ERROR_OUT_OF_MEMORY;
  159. }
  160. *aDestLength = length.value();
  161. return NS_OK;
  162. }
  163. NS_IMETHODIMP
  164. nsBIG5ToUnicode::Reset()
  165. {
  166. mPendingTrail = 0;
  167. mBig5Lead = 0;
  168. return NS_OK;
  169. }