123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- //----------------------------------------------------------------------
- // Global functions and data [declaration]
- #include "nsUnicodeToUTF8.h"
- #include "mozilla/CheckedInt.h"
- NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
- //----------------------------------------------------------------------
- // nsUnicodeToUTF8 class [implementation]
- NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t* aSrc,
- int32_t aSrcLength,
- int32_t* aDestLength)
- {
- MOZ_ASSERT(aDestLength);
- // aSrc is interpreted as UTF16, 3 is normally enough.
- // But when previous buffer only contains part of the surrogate pair, we
- // need to complete it here. If the first word in following buffer is not
- // in valid surrogate range, we need to convert the remaining of last buffer
- // to 3 bytes.
- mozilla::CheckedInt32 length = aSrcLength;
- length *= 3;
- length += 3;
- if (!length.isValid()) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- *aDestLength = length.value();
- return NS_OK;
- }
- NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t* aSrc,
- int32_t* aSrcLength,
- char* aDest,
- int32_t* aDestLength)
- {
- const char16_t* src = aSrc;
- const char16_t* srcEnd = aSrc + *aSrcLength;
- char* dest = aDest;
- int32_t destLen = *aDestLength;
- uint32_t n;
- //complete remaining of last conversion
- if (mHighSurrogate) {
- if (src < srcEnd) {
- *aDestLength = 0;
- return NS_OK_UENC_MOREINPUT;
- }
- if (*aDestLength < 4) {
- *aSrcLength = 0;
- *aDestLength = 0;
- return NS_OK_UENC_MOREOUTPUT;
- }
- if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
- *dest++ = (char)0xef; //replacement character
- *dest++ = (char)0xbf;
- *dest++ = (char)0xbd;
- destLen -= 3;
- } else {
- n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
- (*src - (char16_t)0xdc00) + 0x10000;
- *dest++ = (char)0xf0 | (n >> 18);
- *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
- *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
- *dest++ = (char)0x80 | (n & 0x3f);
- ++src;
- destLen -= 4;
- }
- mHighSurrogate = 0;
- }
- while (src < srcEnd) {
- if ( *src <= 0x007f) {
- if (destLen < 1)
- goto error_more_output;
- *dest++ = (char)*src;
- --destLen;
- } else if (*src <= 0x07ff) {
- if (destLen < 2)
- goto error_more_output;
- *dest++ = (char)0xc0 | (*src >> 6);
- *dest++ = (char)0x80 | (*src & 0x003f);
- destLen -= 2;
- } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
- if (*src >= (char16_t)0xdc00) { //not a pair
- if (destLen < 3)
- goto error_more_output;
- *dest++ = (char)0xef; //replacement character
- *dest++ = (char)0xbf;
- *dest++ = (char)0xbd;
- destLen -= 3;
- ++src;
- continue;
- }
- if ((src+1) >= srcEnd) {
- //we need another surrogate to complete this unicode char
- mHighSurrogate = *src;
- *aDestLength = dest - aDest;
- return NS_OK_UENC_MOREINPUT;
- }
- //handle surrogate
- if (destLen < 4)
- goto error_more_output;
- if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
- *dest++ = (char)0xef; //replacement character
- *dest++ = (char)0xbf;
- *dest++ = (char)0xbd;
- destLen -= 3;
- } else {
- n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
- *dest++ = (char)0xf0 | (n >> 18);
- *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
- *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
- *dest++ = (char)0x80 | (n & 0x3f);
- destLen -= 4;
- ++src;
- }
- } else {
- if (destLen < 3)
- goto error_more_output;
- //treat rest of the character as BMP
- *dest++ = (char)0xe0 | (*src >> 12);
- *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
- *dest++ = (char)0x80 | (*src & 0x003f);
- destLen -= 3;
- }
- ++src;
- }
- *aDestLength = dest - aDest;
- return NS_OK;
- error_more_output:
- *aSrcLength = src - aSrc;
- *aDestLength = dest - aDest;
- return NS_OK_UENC_MOREOUTPUT;
- }
- NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
- {
- char * dest = aDest;
- if (mHighSurrogate) {
- if (*aDestLength < 3) {
- *aDestLength = 0;
- return NS_OK_UENC_MOREOUTPUT;
- }
- *dest++ = (char)0xef; //replacement character
- *dest++ = (char)0xbf;
- *dest++ = (char)0xbd;
- mHighSurrogate = 0;
- *aDestLength = 3;
- return NS_OK;
- }
- *aDestLength = 0;
- return NS_OK;
- }
|