123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include "seccomon.h"
- #include "secport.h"
- /*
- * From RFC 2044:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
- */
- /*
- * From http://www.imc.org/draft-hoffman-utf16
- *
- * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
- *
- * U' = yyyyyyyyyyxxxxxxxxxx
- * W1 = 110110yyyyyyyyyy
- * W2 = 110111xxxxxxxxxx
- */
- /*
- * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
- * character values. If you wish to use this code for working with
- * host byte order values, define the following:
- *
- * #if IS_BIG_ENDIAN
- * #define L_0 0
- * #define L_1 1
- * #define L_2 2
- * #define L_3 3
- * #define H_0 0
- * #define H_1 1
- * #else / * not everyone has elif * /
- * #if IS_LITTLE_ENDIAN
- * #define L_0 3
- * #define L_1 2
- * #define L_2 1
- * #define L_3 0
- * #define H_0 1
- * #define H_1 0
- * #else
- * #error "PDP and NUXI support deferred"
- * #endif / * IS_LITTLE_ENDIAN * /
- * #endif / * IS_BIG_ENDIAN * /
- */
- #define L_0 0
- #define L_1 1
- #define L_2 2
- #define L_3 3
- #define H_0 0
- #define H_1 1
- #define BAD_UTF8 ((PRUint32)-1)
- /*
- * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
- * of Unicode 4.0.0.
- *
- * Parameters:
- * index - Points to the byte offset in inBuf of character to read. On success,
- * updated to the offset of the following character.
- * inBuf - Input buffer, UTF-8 encoded
- * inbufLen - Length of input buffer, in bytes.
- *
- * Returns:
- * Success - The UCS4 encoded character
- * Failure - BAD_UTF8
- */
- static PRUint32
- sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
- {
- PRUint32 result;
- unsigned int i = *index;
- int bytes_left;
- PRUint32 min_value;
- PORT_Assert(i < inBufLen);
- if ((inBuf[i] & 0x80) == 0x00) {
- result = inBuf[i++];
- bytes_left = 0;
- min_value = 0;
- } else if ((inBuf[i] & 0xE0) == 0xC0) {
- result = inBuf[i++] & 0x1F;
- bytes_left = 1;
- min_value = 0x80;
- } else if ((inBuf[i] & 0xF0) == 0xE0) {
- result = inBuf[i++] & 0x0F;
- bytes_left = 2;
- min_value = 0x800;
- } else if ((inBuf[i] & 0xF8) == 0xF0) {
- result = inBuf[i++] & 0x07;
- bytes_left = 3;
- min_value = 0x10000;
- } else {
- return BAD_UTF8;
- }
- while (bytes_left--) {
- if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80)
- return BAD_UTF8;
- result = (result << 6) | (inBuf[i++] & 0x3F);
- }
- /* Check for overlong sequences, surrogates, and outside unicode range */
- if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
- return BAD_UTF8;
- }
- *index = i;
- return result;
- }
- PRBool
- sec_port_ucs4_utf8_conversion_function(
- PRBool toUnicode,
- unsigned char *inBuf,
- unsigned int inBufLen,
- unsigned char *outBuf,
- unsigned int maxOutBufLen,
- unsigned int *outBufLen)
- {
- PORT_Assert((unsigned int *)NULL != outBufLen);
- if (toUnicode) {
- unsigned int i, len = 0;
- for (i = 0; i < inBufLen;) {
- if ((inBuf[i] & 0x80) == 0x00)
- i += 1;
- else if ((inBuf[i] & 0xE0) == 0xC0)
- i += 2;
- else if ((inBuf[i] & 0xF0) == 0xE0)
- i += 3;
- else if ((inBuf[i] & 0xF8) == 0xF0)
- i += 4;
- else
- return PR_FALSE;
- len += 4;
- }
- if (len > maxOutBufLen) {
- *outBufLen = len;
- return PR_FALSE;
- }
- len = 0;
- for (i = 0; i < inBufLen;) {
- PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
- if (ucs4 == BAD_UTF8)
- return PR_FALSE;
- outBuf[len + L_0] = 0x00;
- outBuf[len + L_1] = (unsigned char)(ucs4 >> 16);
- outBuf[len + L_2] = (unsigned char)(ucs4 >> 8);
- outBuf[len + L_3] = (unsigned char)ucs4;
- len += 4;
- }
- *outBufLen = len;
- return PR_TRUE;
- } else {
- unsigned int i, len = 0;
- PORT_Assert((inBufLen % 4) == 0);
- if ((inBufLen % 4) != 0) {
- *outBufLen = 0;
- return PR_FALSE;
- }
- for (i = 0; i < inBufLen; i += 4) {
- if ((inBuf[i + L_0] > 0x00) || (inBuf[i + L_1] > 0x10)) {
- *outBufLen = 0;
- return PR_FALSE;
- } else if (inBuf[i + L_1] >= 0x01)
- len += 4;
- else if (inBuf[i + L_2] >= 0x08)
- len += 3;
- else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80))
- len += 2;
- else
- len += 1;
- }
- if (len > maxOutBufLen) {
- *outBufLen = len;
- return PR_FALSE;
- }
- len = 0;
- for (i = 0; i < inBufLen; i += 4) {
- if (inBuf[i + L_1] >= 0x01) {
- /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
- /* 00000000 000abcde fghijklm nopqrstu ->
- 11110abc 10defghi 10jklmno 10pqrstu */
- outBuf[len + 0] = 0xF0 | ((inBuf[i + L_1] & 0x1C) >> 2);
- outBuf[len + 1] = 0x80 | ((inBuf[i + L_1] & 0x03) << 4) | ((inBuf[i + L_2] & 0xF0) >> 4);
- outBuf[len + 2] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
- outBuf[len + 3] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
- len += 4;
- } else if (inBuf[i + L_2] >= 0x08) {
- /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
- /* 00000000 00000000 abcdefgh ijklmnop ->
- 1110abcd 10efghij 10klmnop */
- outBuf[len + 0] = 0xE0 | ((inBuf[i + L_2] & 0xF0) >> 4);
- outBuf[len + 1] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
- outBuf[len + 2] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
- len += 3;
- } else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) {
- /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
- /* 00000000 00000000 00000abc defghijk ->
- 110abcde 10fghijk */
- outBuf[len + 0] = 0xC0 | ((inBuf[i + L_2] & 0x07) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
- outBuf[len + 1] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
- len += 2;
- } else {
- /* 0000 0000-0000 007F -> 0xxxxxx */
- /* 00000000 00000000 00000000 0abcdefg ->
- 0abcdefg */
- outBuf[len + 0] = (inBuf[i + L_3] & 0x7F);
- len += 1;
- }
- }
- *outBufLen = len;
- return PR_TRUE;
- }
- }
- PRBool
- sec_port_ucs2_utf8_conversion_function(
- PRBool toUnicode,
- unsigned char *inBuf,
- unsigned int inBufLen,
- unsigned char *outBuf,
- unsigned int maxOutBufLen,
- unsigned int *outBufLen)
- {
- PORT_Assert((unsigned int *)NULL != outBufLen);
- if (toUnicode) {
- unsigned int i, len = 0;
- for (i = 0; i < inBufLen;) {
- if ((inBuf[i] & 0x80) == 0x00) {
- i += 1;
- len += 2;
- } else if ((inBuf[i] & 0xE0) == 0xC0) {
- i += 2;
- len += 2;
- } else if ((inBuf[i] & 0xF0) == 0xE0) {
- i += 3;
- len += 2;
- } else if ((inBuf[i] & 0xF8) == 0xF0) {
- i += 4;
- len += 4;
- } else
- return PR_FALSE;
- }
- if (len > maxOutBufLen) {
- *outBufLen = len;
- return PR_FALSE;
- }
- len = 0;
- for (i = 0; i < inBufLen;) {
- PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
- if (ucs4 == BAD_UTF8)
- return PR_FALSE;
- if (ucs4 < 0x10000) {
- outBuf[len + H_0] = (unsigned char)(ucs4 >> 8);
- outBuf[len + H_1] = (unsigned char)ucs4;
- len += 2;
- } else {
- ucs4 -= 0x10000;
- outBuf[len + 0 + H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
- outBuf[len + 0 + H_1] = (unsigned char)(ucs4 >> 10);
- outBuf[len + 2 + H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
- outBuf[len + 2 + H_1] = (unsigned char)ucs4;
- len += 4;
- }
- }
- *outBufLen = len;
- return PR_TRUE;
- } else {
- unsigned int i, len = 0;
- PORT_Assert((inBufLen % 2) == 0);
- if ((inBufLen % 2) != 0) {
- *outBufLen = 0;
- return PR_FALSE;
- }
- for (i = 0; i < inBufLen; i += 2) {
- if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00))
- len += 1;
- else if (inBuf[i + H_0] < 0x08)
- len += 2;
- else if (((inBuf[i + H_0] & 0xFC) == 0xD8)) {
- if (((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)) {
- i += 2;
- len += 4;
- } else {
- return PR_FALSE;
- }
- } else if ((inBuf[i + H_0] & 0xFC) == 0xDC) {
- return PR_FALSE;
- } else {
- len += 3;
- }
- }
- if (len > maxOutBufLen) {
- *outBufLen = len;
- return PR_FALSE;
- }
- len = 0;
- for (i = 0; i < inBufLen; i += 2) {
- if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) {
- /* 0000-007F -> 0xxxxxx */
- /* 00000000 0abcdefg -> 0abcdefg */
- outBuf[len] = inBuf[i + H_1] & 0x7F;
- len += 1;
- } else if (inBuf[i + H_0] < 0x08) {
- /* 0080-07FF -> 110xxxxx 10xxxxxx */
- /* 00000abc defghijk -> 110abcde 10fghijk */
- outBuf[len + 0] = 0xC0 | ((inBuf[i + H_0] & 0x07) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
- outBuf[len + 1] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0);
- len += 2;
- } else if ((inBuf[i + H_0] & 0xFC) == 0xD8) {
- int abcde, BCDE;
- PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC));
- /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
- /* 110110BC DEfghijk 110111lm nopqrstu ->
- { Let abcde = BCDE + 1 }
- 11110abc 10defghi 10jklmno 10pqrstu */
- BCDE = ((inBuf[i + H_0] & 0x03) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
- abcde = BCDE + 1;
- outBuf[len + 0] = 0xF0 | ((abcde & 0x1C) >> 2);
- outBuf[len + 1] = 0x80 | ((abcde & 0x03) << 4) | ((inBuf[i + 0 + H_1] & 0x3C) >> 2);
- outBuf[len + 2] = 0x80 | ((inBuf[i + 0 + H_1] & 0x03) << 4) | ((inBuf[i + 2 + H_0] & 0x03) << 2) | ((inBuf[i + 2 + H_1] & 0xC0) >> 6);
- outBuf[len + 3] = 0x80 | ((inBuf[i + 2 + H_1] & 0x3F) >> 0);
- i += 2;
- len += 4;
- } else {
- /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
- /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
- outBuf[len + 0] = 0xE0 | ((inBuf[i + H_0] & 0xF0) >> 4);
- outBuf[len + 1] = 0x80 | ((inBuf[i + H_0] & 0x0F) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
- outBuf[len + 2] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0);
- len += 3;
- }
- }
- *outBufLen = len;
- return PR_TRUE;
- }
- }
- PRBool
- sec_port_iso88591_utf8_conversion_function(
- const unsigned char *inBuf,
- unsigned int inBufLen,
- unsigned char *outBuf,
- unsigned int maxOutBufLen,
- unsigned int *outBufLen)
- {
- unsigned int i, len = 0;
- PORT_Assert((unsigned int *)NULL != outBufLen);
- for (i = 0; i < inBufLen; i++) {
- if ((inBuf[i] & 0x80) == 0x00)
- len += 1;
- else
- len += 2;
- }
- if (len > maxOutBufLen) {
- *outBufLen = len;
- return PR_FALSE;
- }
- len = 0;
- for (i = 0; i < inBufLen; i++) {
- if ((inBuf[i] & 0x80) == 0x00) {
- /* 00-7F -> 0xxxxxxx */
- /* 0abcdefg -> 0abcdefg */
- outBuf[len] = inBuf[i];
- len += 1;
- } else {
- /* 80-FF <- 110xxxxx 10xxxxxx */
- /* 00000000 abcdefgh -> 110000ab 10cdefgh */
- outBuf[len + 0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
- outBuf[len + 1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
- len += 2;
- }
- }
- *outBufLen = len;
- return PR_TRUE;
- }
|