nsConverterInputStream.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsConverterInputStream.h"
  6. #include "nsIInputStream.h"
  7. #include "nsReadLine.h"
  8. #include "nsStreamUtils.h"
  9. #include <algorithm>
  10. #include "mozilla/dom/EncodingUtils.h"
  11. using mozilla::dom::EncodingUtils;
  12. #define CONVERTER_BUFFER_SIZE 8192
  13. NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
  14. nsIUnicharInputStream, nsIUnicharLineInputStream)
  15. NS_IMETHODIMP
  16. nsConverterInputStream::Init(nsIInputStream* aStream,
  17. const char *aCharset,
  18. int32_t aBufferSize,
  19. char16_t aReplacementChar)
  20. {
  21. nsAutoCString label;
  22. if (!aCharset) {
  23. label.AssignLiteral("UTF-8");
  24. } else {
  25. label = aCharset;
  26. }
  27. if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
  28. // get the decoder
  29. nsAutoCString encoding;
  30. if (label.EqualsLiteral("UTF-16")) {
  31. // Compat with old test cases. Unclear if any extensions really care.
  32. encoding.Assign(label);
  33. } else if (!EncodingUtils::FindEncodingForLabelNoReplacement(label,
  34. encoding)) {
  35. return NS_ERROR_UCONV_NOCONV;
  36. }
  37. mConverter = EncodingUtils::DecoderForEncoding(encoding);
  38. // set up our buffers
  39. if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
  40. !mUnicharData.SetCapacity(aBufferSize, mozilla::fallible)) {
  41. return NS_ERROR_OUT_OF_MEMORY;
  42. }
  43. mInput = aStream;
  44. mReplacementChar = aReplacementChar;
  45. if (!aReplacementChar ||
  46. aReplacementChar != mConverter->GetCharacterForUnMapped()) {
  47. mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
  48. }
  49. return NS_OK;
  50. }
  51. NS_IMETHODIMP
  52. nsConverterInputStream::Close()
  53. {
  54. nsresult rv = mInput ? mInput->Close() : NS_OK;
  55. mLineBuffer = nullptr;
  56. mInput = nullptr;
  57. mConverter = nullptr;
  58. mByteData.Clear();
  59. mUnicharData.Clear();
  60. return rv;
  61. }
  62. NS_IMETHODIMP
  63. nsConverterInputStream::Read(char16_t* aBuf,
  64. uint32_t aCount,
  65. uint32_t *aReadCount)
  66. {
  67. NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  68. uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  69. if (0 == readCount) {
  70. // Fill the unichar buffer
  71. readCount = Fill(&mLastErrorCode);
  72. if (readCount == 0) {
  73. *aReadCount = 0;
  74. return mLastErrorCode;
  75. }
  76. }
  77. if (readCount > aCount) {
  78. readCount = aCount;
  79. }
  80. memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
  81. readCount * sizeof(char16_t));
  82. mUnicharDataOffset += readCount;
  83. *aReadCount = readCount;
  84. return NS_OK;
  85. }
  86. NS_IMETHODIMP
  87. nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
  88. void* aClosure,
  89. uint32_t aCount, uint32_t *aReadCount)
  90. {
  91. NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  92. uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
  93. nsresult rv;
  94. if (0 == bytesToWrite) {
  95. // Fill the unichar buffer
  96. bytesToWrite = Fill(&rv);
  97. if (bytesToWrite <= 0) {
  98. *aReadCount = 0;
  99. return rv;
  100. }
  101. }
  102. if (bytesToWrite > aCount)
  103. bytesToWrite = aCount;
  104. uint32_t bytesWritten;
  105. uint32_t totalBytesWritten = 0;
  106. while (bytesToWrite) {
  107. rv = aWriter(this, aClosure,
  108. mUnicharData.Elements() + mUnicharDataOffset,
  109. totalBytesWritten, bytesToWrite, &bytesWritten);
  110. if (NS_FAILED(rv)) {
  111. // don't propagate errors to the caller
  112. break;
  113. }
  114. bytesToWrite -= bytesWritten;
  115. totalBytesWritten += bytesWritten;
  116. mUnicharDataOffset += bytesWritten;
  117. }
  118. *aReadCount = totalBytesWritten;
  119. return NS_OK;
  120. }
  121. NS_IMETHODIMP
  122. nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
  123. uint32_t* aReadCount)
  124. {
  125. NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  126. uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  127. if (0 == readCount) {
  128. // Fill the unichar buffer
  129. readCount = Fill(&mLastErrorCode);
  130. if (readCount == 0) {
  131. *aReadCount = 0;
  132. return mLastErrorCode;
  133. }
  134. }
  135. if (readCount > aCount) {
  136. readCount = aCount;
  137. }
  138. const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
  139. aString.Assign(buf, readCount);
  140. mUnicharDataOffset += readCount;
  141. *aReadCount = readCount;
  142. return NS_OK;
  143. }
  144. uint32_t
  145. nsConverterInputStream::Fill(nsresult * aErrorCode)
  146. {
  147. if (nullptr == mInput) {
  148. // We already closed the stream!
  149. *aErrorCode = NS_BASE_STREAM_CLOSED;
  150. return 0;
  151. }
  152. if (NS_FAILED(mLastErrorCode)) {
  153. // We failed to completely convert last time, and error-recovery
  154. // is disabled. We will fare no better this time, so...
  155. *aErrorCode = mLastErrorCode;
  156. return 0;
  157. }
  158. // We assume a many to one conversion and are using equal sizes for
  159. // the two buffers. However if an error happens at the very start
  160. // of a byte buffer we may end up in a situation where n bytes lead
  161. // to n+1 unicode chars. Thus we need to keep track of the leftover
  162. // bytes as we convert.
  163. uint32_t nb;
  164. *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
  165. if (nb == 0 && mLeftOverBytes == 0) {
  166. // No more data
  167. *aErrorCode = NS_OK;
  168. return 0;
  169. }
  170. NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
  171. "mByteData is lying to us somewhere");
  172. // Now convert as much of the byte buffer to unicode as possible
  173. mUnicharDataOffset = 0;
  174. mUnicharDataLength = 0;
  175. uint32_t srcConsumed = 0;
  176. do {
  177. int32_t srcLen = mByteData.Length() - srcConsumed;
  178. int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength;
  179. *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed,
  180. &srcLen,
  181. mUnicharData.Elements()+mUnicharDataLength,
  182. &dstLen);
  183. mUnicharDataLength += dstLen;
  184. // XXX if srcLen is negative, we want to drop the _first_ byte in
  185. // the erroneous byte sequence and try again. This is not quite
  186. // possible right now -- see bug 160784
  187. srcConsumed += srcLen;
  188. if (NS_FAILED(*aErrorCode) && mReplacementChar) {
  189. NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength,
  190. "Decoder returned an error but filled the output buffer! "
  191. "Should not happen.");
  192. mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar;
  193. ++srcConsumed;
  194. // XXX this is needed to make sure we don't underrun our buffer;
  195. // bug 160784 again
  196. srcConsumed = std::max<uint32_t>(srcConsumed, 0);
  197. mConverter->Reset();
  198. }
  199. NS_ASSERTION(srcConsumed <= mByteData.Length(),
  200. "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
  201. } while (mReplacementChar &&
  202. NS_FAILED(*aErrorCode) &&
  203. mUnicharData.Capacity() > mUnicharDataLength);
  204. mLeftOverBytes = mByteData.Length() - srcConsumed;
  205. return mUnicharDataLength;
  206. }
  207. NS_IMETHODIMP
  208. nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
  209. {
  210. if (!mLineBuffer) {
  211. mLineBuffer = new nsLineBuffer<char16_t>;
  212. }
  213. return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
  214. }