123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include "nsConverterInputStream.h"
- #include "nsIInputStream.h"
- #include "nsReadLine.h"
- #include "nsStreamUtils.h"
- #include <algorithm>
- #include "mozilla/dom/EncodingUtils.h"
- using mozilla::dom::EncodingUtils;
- #define CONVERTER_BUFFER_SIZE 8192
- NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
- nsIUnicharInputStream, nsIUnicharLineInputStream)
- NS_IMETHODIMP
- nsConverterInputStream::Init(nsIInputStream* aStream,
- const char *aCharset,
- int32_t aBufferSize,
- char16_t aReplacementChar)
- {
- nsAutoCString label;
- if (!aCharset) {
- label.AssignLiteral("UTF-8");
- } else {
- label = aCharset;
- }
- if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
-
- // get the decoder
- nsAutoCString encoding;
- if (label.EqualsLiteral("UTF-16")) {
- // Compat with old test cases. Unclear if any extensions really care.
- encoding.Assign(label);
- } else if (!EncodingUtils::FindEncodingForLabelNoReplacement(label,
- encoding)) {
- return NS_ERROR_UCONV_NOCONV;
- }
- mConverter = EncodingUtils::DecoderForEncoding(encoding);
-
- // set up our buffers
- if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
- !mUnicharData.SetCapacity(aBufferSize, mozilla::fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- mInput = aStream;
- mReplacementChar = aReplacementChar;
- if (!aReplacementChar ||
- aReplacementChar != mConverter->GetCharacterForUnMapped()) {
- mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
- }
- return NS_OK;
- }
- NS_IMETHODIMP
- nsConverterInputStream::Close()
- {
- nsresult rv = mInput ? mInput->Close() : NS_OK;
- mLineBuffer = nullptr;
- mInput = nullptr;
- mConverter = nullptr;
- mByteData.Clear();
- mUnicharData.Clear();
- return rv;
- }
- NS_IMETHODIMP
- nsConverterInputStream::Read(char16_t* aBuf,
- uint32_t aCount,
- uint32_t *aReadCount)
- {
- NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
- uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
- if (0 == readCount) {
- // Fill the unichar buffer
- readCount = Fill(&mLastErrorCode);
- if (readCount == 0) {
- *aReadCount = 0;
- return mLastErrorCode;
- }
- }
- if (readCount > aCount) {
- readCount = aCount;
- }
- memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
- readCount * sizeof(char16_t));
- mUnicharDataOffset += readCount;
- *aReadCount = readCount;
- return NS_OK;
- }
- NS_IMETHODIMP
- nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
- void* aClosure,
- uint32_t aCount, uint32_t *aReadCount)
- {
- NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
- uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
- nsresult rv;
- if (0 == bytesToWrite) {
- // Fill the unichar buffer
- bytesToWrite = Fill(&rv);
- if (bytesToWrite <= 0) {
- *aReadCount = 0;
- return rv;
- }
- }
-
- if (bytesToWrite > aCount)
- bytesToWrite = aCount;
-
- uint32_t bytesWritten;
- uint32_t totalBytesWritten = 0;
- while (bytesToWrite) {
- rv = aWriter(this, aClosure,
- mUnicharData.Elements() + mUnicharDataOffset,
- totalBytesWritten, bytesToWrite, &bytesWritten);
- if (NS_FAILED(rv)) {
- // don't propagate errors to the caller
- break;
- }
-
- bytesToWrite -= bytesWritten;
- totalBytesWritten += bytesWritten;
- mUnicharDataOffset += bytesWritten;
-
- }
- *aReadCount = totalBytesWritten;
- return NS_OK;
- }
- NS_IMETHODIMP
- nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
- uint32_t* aReadCount)
- {
- NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
- uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
- if (0 == readCount) {
- // Fill the unichar buffer
- readCount = Fill(&mLastErrorCode);
- if (readCount == 0) {
- *aReadCount = 0;
- return mLastErrorCode;
- }
- }
- if (readCount > aCount) {
- readCount = aCount;
- }
- const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
- aString.Assign(buf, readCount);
- mUnicharDataOffset += readCount;
- *aReadCount = readCount;
- return NS_OK;
- }
- uint32_t
- nsConverterInputStream::Fill(nsresult * aErrorCode)
- {
- if (nullptr == mInput) {
- // We already closed the stream!
- *aErrorCode = NS_BASE_STREAM_CLOSED;
- return 0;
- }
- if (NS_FAILED(mLastErrorCode)) {
- // We failed to completely convert last time, and error-recovery
- // is disabled. We will fare no better this time, so...
- *aErrorCode = mLastErrorCode;
- return 0;
- }
-
- // We assume a many to one conversion and are using equal sizes for
- // the two buffers. However if an error happens at the very start
- // of a byte buffer we may end up in a situation where n bytes lead
- // to n+1 unicode chars. Thus we need to keep track of the leftover
- // bytes as we convert.
-
- uint32_t nb;
- *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
- if (nb == 0 && mLeftOverBytes == 0) {
- // No more data
- *aErrorCode = NS_OK;
- return 0;
- }
- NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
- "mByteData is lying to us somewhere");
- // Now convert as much of the byte buffer to unicode as possible
- mUnicharDataOffset = 0;
- mUnicharDataLength = 0;
- uint32_t srcConsumed = 0;
- do {
- int32_t srcLen = mByteData.Length() - srcConsumed;
- int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength;
- *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed,
- &srcLen,
- mUnicharData.Elements()+mUnicharDataLength,
- &dstLen);
- mUnicharDataLength += dstLen;
- // XXX if srcLen is negative, we want to drop the _first_ byte in
- // the erroneous byte sequence and try again. This is not quite
- // possible right now -- see bug 160784
- srcConsumed += srcLen;
- if (NS_FAILED(*aErrorCode) && mReplacementChar) {
- NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength,
- "Decoder returned an error but filled the output buffer! "
- "Should not happen.");
- mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar;
- ++srcConsumed;
- // XXX this is needed to make sure we don't underrun our buffer;
- // bug 160784 again
- srcConsumed = std::max<uint32_t>(srcConsumed, 0);
- mConverter->Reset();
- }
- NS_ASSERTION(srcConsumed <= mByteData.Length(),
- "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
- } while (mReplacementChar &&
- NS_FAILED(*aErrorCode) &&
- mUnicharData.Capacity() > mUnicharDataLength);
- mLeftOverBytes = mByteData.Length() - srcConsumed;
- return mUnicharDataLength;
- }
- NS_IMETHODIMP
- nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
- {
- if (!mLineBuffer) {
- mLineBuffer = new nsLineBuffer<char16_t>;
- }
- return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
- }
|