12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346 |
- /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include <string.h>
- #include "prmem.h"
- #include "prprf.h"
- #include "plstr.h"
- #include "plbase64.h"
- #include "nsCRT.h"
- #include "nsMemory.h"
- #include "nsTArray.h"
- #include "nsCOMPtr.h"
- #include "nsEscape.h"
- #include "nsIUTF8ConverterService.h"
- #include "nsUConvCID.h"
- #include "nsIServiceManager.h"
- #include "nsMIMEHeaderParamImpl.h"
- #include "nsReadableUtils.h"
- #include "nsNativeCharsetUtils.h"
- #include "nsError.h"
- #include "nsIUnicodeDecoder.h"
- #include "mozilla/dom/EncodingUtils.h"
- using mozilla::dom::EncodingUtils;
- // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
-
- static char *DecodeQ(const char *, uint32_t);
- static bool Is7bitNonAsciiString(const char *, uint32_t);
- static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
- static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
- static nsresult internalDecodeParameter(const nsACString&, const char*,
- const char*, bool, bool, nsACString&);
- // XXX The chance of UTF-7 being used in the message header is really
- // low, but in theory it's possible.
- #define IS_7BIT_NON_ASCII_CHARSET(cset) \
- (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
- !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
- !nsCRT::strncasecmp((cset), "UTF-7", 5))
- NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
- const char *aParamName,
- const nsACString& aFallbackCharset,
- bool aTryLocaleCharset,
- char **aLang, nsAString& aResult)
- {
- return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
- aFallbackCharset, aTryLocaleCharset, aLang, aResult);
- }
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
- const char *aParamName,
- const nsACString& aFallbackCharset,
- bool aTryLocaleCharset,
- char **aLang, nsAString& aResult)
- {
- return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
- aFallbackCharset, aTryLocaleCharset, aLang, aResult);
- }
- // XXX : aTryLocaleCharset is not yet effective.
- nsresult
- nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
- const char *aParamName,
- ParamDecoding aDecoding,
- const nsACString& aFallbackCharset,
- bool aTryLocaleCharset,
- char **aLang, nsAString& aResult)
- {
- aResult.Truncate();
- nsresult rv;
- // get parameter (decode RFC 2231/5987 when applicable, as specified by
- // aDecoding (5987 being a subset of 2231) and return charset.)
- nsXPIDLCString med;
- nsXPIDLCString charset;
- rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
- aDecoding, getter_Copies(charset), aLang,
- getter_Copies(med));
- if (NS_FAILED(rv))
- return rv;
- // convert to UTF-8 after charset conversion and RFC 2047 decoding
- // if necessary.
-
- nsAutoCString str1;
- rv = internalDecodeParameter(med, charset.get(), nullptr, false,
- // was aDecoding == MIME_FIELD_ENCODING
- // see bug 875615
- true,
- str1);
- NS_ENSURE_SUCCESS(rv, rv);
- if (!aFallbackCharset.IsEmpty())
- {
- nsAutoCString charset;
- EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
- nsAutoCString str2;
- nsCOMPtr<nsIUTF8ConverterService>
- cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
- if (cvtUTF8 &&
- NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
- PromiseFlatCString(aFallbackCharset).get(), false,
- !charset.EqualsLiteral("UTF-8"),
- 1, str2))) {
- CopyUTF8toUTF16(str2, aResult);
- return NS_OK;
- }
- }
- if (IsUTF8(str1)) {
- CopyUTF8toUTF16(str1, aResult);
- return NS_OK;
- }
- if (aTryLocaleCharset && !NS_IsNativeUTF8())
- return NS_CopyNativeToUnicode(str1, aResult);
- CopyASCIItoUTF16(str1, aResult);
- return NS_OK;
- }
- // remove backslash-encoded sequences from quoted-strings
- // modifies string in place, potentially shortening it
- void RemoveQuotedStringEscapes(char *src)
- {
- char *dst = src;
- for (char *c = src; *c; ++c)
- {
- if (c[0] == '\\' && c[1])
- {
- // skip backslash if not at end
- ++c;
- }
- *dst++ = *c;
- }
- *dst = 0;
- }
- // true is character is a hex digit
- bool IsHexDigit(char aChar)
- {
- char c = aChar;
- return (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F') ||
- (c >= '0' && c <= '9');
- }
- // validate that a C String containing %-escapes is syntactically valid
- bool IsValidPercentEscaped(const char *aValue, int32_t len)
- {
- for (int32_t i = 0; i < len; i++) {
- if (aValue[i] == '%') {
- if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
- return false;
- }
- }
- }
- return true;
- }
- // Support for continuations (RFC 2231, Section 3)
- // only a sane number supported
- #define MAX_CONTINUATIONS 999
- // part of a continuation
- class Continuation {
- public:
- Continuation(const char *aValue, uint32_t aLength,
- bool aNeedsPercentDecoding, bool aWasQuotedString) {
- value = aValue;
- length = aLength;
- needsPercentDecoding = aNeedsPercentDecoding;
- wasQuotedString = aWasQuotedString;
- }
- Continuation() {
- // empty constructor needed for nsTArray
- value = 0L;
- length = 0;
- needsPercentDecoding = false;
- wasQuotedString = false;
- }
- ~Continuation() = default;
- const char *value;
- uint32_t length;
- bool needsPercentDecoding;
- bool wasQuotedString;
- };
- // combine segments into a single string, returning the allocated string
- // (or nullptr) while emptying the list
- char *combineContinuations(nsTArray<Continuation>& aArray)
- {
- // Sanity check
- if (aArray.Length() == 0)
- return nullptr;
- // Get an upper bound for the length
- uint32_t length = 0;
- for (uint32_t i = 0; i < aArray.Length(); i++) {
- length += aArray[i].length;
- }
- // Allocate
- char *result = (char *) moz_xmalloc(length + 1);
- // Concatenate
- if (result) {
- *result = '\0';
- for (uint32_t i = 0; i < aArray.Length(); i++) {
- Continuation cont = aArray[i];
- if (! cont.value) break;
- char *c = result + strlen(result);
- strncat(result, cont.value, cont.length);
- if (cont.needsPercentDecoding) {
- nsUnescape(c);
- }
- if (cont.wasQuotedString) {
- RemoveQuotedStringEscapes(c);
- }
- }
- // return null if empty value
- if (*result == '\0') {
- free(result);
- result = nullptr;
- }
- } else {
- // Handle OOM
- NS_WARNING("Out of memory\n");
- }
- return result;
- }
- // add a continuation, return false on error if segment already has been seen
- bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
- const char *aValue, uint32_t aLength,
- bool aNeedsPercentDecoding, bool aWasQuotedString)
- {
- if (aIndex < aArray.Length() && aArray[aIndex].value) {
- NS_WARNING("duplicate RC2231 continuation segment #\n");
- return false;
- }
- if (aIndex > MAX_CONTINUATIONS) {
- NS_WARNING("RC2231 continuation segment # exceeds limit\n");
- return false;
- }
- if (aNeedsPercentDecoding && aWasQuotedString) {
- NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
- return false;
- }
- Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
- if (aArray.Length() <= aIndex) {
- aArray.SetLength(aIndex + 1);
- }
- aArray[aIndex] = cont;
- return true;
- }
- // parse a segment number; return -1 on error
- int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
- {
- if (aLen < 1) {
- NS_WARNING("segment number missing\n");
- return -1;
- }
- if (aLen > 1 && aValue[0] == '0') {
- NS_WARNING("leading '0' not allowed in segment number\n");
- return -1;
- }
- int32_t segmentNumber = 0;
- for (int32_t i = 0; i < aLen; i++) {
- if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
- NS_WARNING("invalid characters in segment number\n");
- return -1;
- }
- segmentNumber *= 10;
- segmentNumber += aValue[i] - '0';
- if (segmentNumber > MAX_CONTINUATIONS) {
- NS_WARNING("Segment number exceeds sane size\n");
- return -1;
- }
- }
- return segmentNumber;
- }
- // validate a given octet sequence for compliance with the specified
- // encoding
- bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
- {
- nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
- (NS_UTF8CONVERTERSERVICE_CONTRACTID));
- if (!cvtUTF8) {
- NS_WARNING("Can't get UTF8ConverterService\n");
- return false;
- }
- nsAutoCString tmpRaw;
- tmpRaw.Assign(aOctets);
- nsAutoCString tmpDecoded;
- nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
- PromiseFlatCString(aCharset).get(),
- false, false, 1, tmpDecoded);
- if (rv != NS_OK) {
- // we can't decode; charset may be unsupported, or the octet sequence
- // is broken (illegal or incomplete octet sequence contained)
- NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
- return false;
- }
- return true;
- }
- // moved almost verbatim from mimehdrs.cpp
- // char *
- // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
- // char **charset, char **language)
- //
- // The format of these header lines is
- // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
- const char *aParamName,
- char **aCharset,
- char **aLang,
- char **aResult)
- {
- return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
- aCharset, aLang, aResult);
- }
- nsresult
- nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
- const char *aParamName,
- ParamDecoding aDecoding,
- char **aCharset,
- char **aLang,
- char **aResult)
- {
- if (!aHeaderValue || !*aHeaderValue || !aResult)
- return NS_ERROR_INVALID_ARG;
- *aResult = nullptr;
- if (aCharset) *aCharset = nullptr;
- if (aLang) *aLang = nullptr;
- nsAutoCString charset;
- // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
- // them for HTTP header fields later on, see bug 776324
- bool acceptContinuations = true;
- const char *str = aHeaderValue;
- // skip leading white space.
- for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
- ;
- const char *start = str;
-
- // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
- // For instance, return 'inline' in the following case:
- // Content-Disposition: inline; filename=.....
- if (!aParamName || !*aParamName)
- {
- for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
- ;
- if (str == start)
- return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
- *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
- NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
- (*aResult)[str - start] = '\0'; // null-terminate
- return NS_OK;
- }
- /* Skip forward to first ';' */
- for (; *str && *str != ';' && *str != ','; ++str)
- ;
- if (*str)
- str++;
- /* Skip over following whitespace */
- for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
- ;
- // Some broken http servers just specify parameters
- // like 'filename' without specifying disposition
- // method. Rewind to the first non-white-space
- // character.
-
- if (!*str)
- str = start;
- // RFC2231 - The legitimate parm format can be:
- // A. title=ThisIsTitle
- // B. title*=us-ascii'en-us'This%20is%20wierd.
- // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
- // title*1*=have%20to%20support%20this.
- // title*2="Else..."
- // D. title*0="Hey, what you think you are doing?"
- // title*1="There is no charset and lang info."
- // RFC5987: only A and B
-
- // collect results for the different algorithms (plain filename,
- // RFC5987/2231-encoded filename, + continuations) separately and decide
- // which to use at the end
- char *caseAResult = nullptr;
- char *caseBResult = nullptr;
- char *caseCDResult = nullptr;
- // collect continuation segments
- nsTArray<Continuation> segments;
- // our copies of the charset parameter, kept separately as they might
- // differ for the two formats
- nsDependentCSubstring charsetB, charsetCD;
- nsDependentCSubstring lang;
- int32_t paramLen = strlen(aParamName);
- while (*str) {
- // find name/value
- const char *nameStart = str;
- const char *nameEnd = nullptr;
- const char *valueStart = str;
- const char *valueEnd = nullptr;
- bool isQuotedString = false;
- NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
- // Skip forward to the end of this token.
- for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
- ;
- nameEnd = str;
- int32_t nameLen = nameEnd - nameStart;
- // Skip over whitespace, '=', and whitespace
- while (nsCRT::IsAsciiSpace(*str)) ++str;
- if (!*str) {
- break;
- }
- if (*str++ != '=') {
- // don't accept parameters without "="
- goto increment_str;
- }
- while (nsCRT::IsAsciiSpace(*str)) ++str;
- if (*str != '"') {
- // The value is a token, not a quoted string.
- valueStart = str;
- for (valueEnd = str;
- *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
- valueEnd++)
- ;
- str = valueEnd;
- } else {
- isQuotedString = true;
-
- ++str;
- valueStart = str;
- for (valueEnd = str; *valueEnd; ++valueEnd) {
- if (*valueEnd == '\\' && *(valueEnd + 1))
- ++valueEnd;
- else if (*valueEnd == '"')
- break;
- }
- str = valueEnd;
- // *valueEnd != null means that *valueEnd is quote character.
- if (*valueEnd)
- str++;
- }
- // See if this is the simplest case (case A above),
- // a 'single' line value with no charset and lang.
- // If so, copy it and return.
- if (nameLen == paramLen &&
- !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
- if (caseAResult) {
- // we already have one caseA result, ignore subsequent ones
- goto increment_str;
- }
- // if the parameter spans across multiple lines we have to strip out the
- // line continuation -- jht 4/29/98
- nsAutoCString tempStr(valueStart, valueEnd - valueStart);
- tempStr.StripChars("\r\n");
- char *res = ToNewCString(tempStr);
- NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
-
- if (isQuotedString)
- RemoveQuotedStringEscapes(res);
- caseAResult = res;
- // keep going, we may find a RFC 2231/5987 encoded alternative
- }
- // case B, C, and D
- else if (nameLen > paramLen &&
- !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
- *(nameStart + paramLen) == '*') {
- // 1st char past '*'
- const char *cp = nameStart + paramLen + 1;
- // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
- bool needExtDecoding = *(nameEnd - 1) == '*';
- bool caseB = nameLen == paramLen + 1;
- bool caseCStart = (*cp == '0') && needExtDecoding;
- // parse the segment number
- int32_t segmentNumber = -1;
- if (!caseB) {
- int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
- segmentNumber = parseSegmentNumber(cp, segLen);
- if (segmentNumber == -1) {
- acceptContinuations = false;
- goto increment_str;
- }
- }
- // CaseB and start of CaseC: requires charset and optional language
- // in quotes (quotes required even if lang is blank)
- if (caseB || (caseCStart && acceptContinuations)) {
- // look for single quotation mark(')
- const char *sQuote1 = PL_strchr(valueStart, 0x27);
- const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
- // Two single quotation marks must be present even in
- // absence of charset and lang.
- if (!sQuote1 || !sQuote2) {
- NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
- }
- const char *charsetStart = nullptr;
- int32_t charsetLength = 0;
- const char *langStart = nullptr;
- int32_t langLength = 0;
- const char *rawValStart = nullptr;
- int32_t rawValLength = 0;
- if (sQuote2 && sQuote1) {
- // both delimiters present: charSet'lang'rawVal
- rawValStart = sQuote2 + 1;
- rawValLength = valueEnd - rawValStart;
- langStart = sQuote1 + 1;
- langLength = sQuote2 - langStart;
- charsetStart = valueStart;
- charsetLength = sQuote1 - charsetStart;
- }
- else if (sQuote1) {
- // one delimiter; assume charset'rawVal
- rawValStart = sQuote1 + 1;
- rawValLength = valueEnd - rawValStart;
- charsetStart = valueStart;
- charsetLength = sQuote1 - valueStart;
- }
- else {
- // no delimiter: just rawVal
- rawValStart = valueStart;
- rawValLength = valueEnd - valueStart;
- }
- if (langLength != 0) {
- lang.Assign(langStart, langLength);
- }
- // keep the charset for later
- if (caseB) {
- charsetB.Assign(charsetStart, charsetLength);
- } else {
- // if caseCorD
- charsetCD.Assign(charsetStart, charsetLength);
- }
- // non-empty value part
- if (rawValLength > 0) {
- if (!caseBResult && caseB) {
- if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
- goto increment_str;
- }
- // allocate buffer for the raw value
- char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
- if (!tmpResult) {
- goto increment_str;
- }
- *(tmpResult + rawValLength) = 0;
- nsUnescape(tmpResult);
- caseBResult = tmpResult;
- } else {
- // caseC
- bool added = addContinuation(segments, 0, rawValStart,
- rawValLength, needExtDecoding,
- isQuotedString);
- if (!added) {
- // continuation not added, stop processing them
- acceptContinuations = false;
- }
- }
- }
- } // end of if-block : title*0*= or title*=
- // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
- // or 2nd or later lines of a caseC param : title*[1-9]*=
- else if (acceptContinuations && segmentNumber != -1) {
- uint32_t valueLength = valueEnd - valueStart;
- bool added = addContinuation(segments, segmentNumber, valueStart,
- valueLength, needExtDecoding,
- isQuotedString);
- if (!added) {
- // continuation not added, stop processing them
- acceptContinuations = false;
- }
- } // end of if-block : title*[0-9]= or title*[1-9]*=
- }
- // str now points after the end of the value.
- // skip over whitespace, ';', whitespace.
- increment_str:
- while (nsCRT::IsAsciiSpace(*str)) ++str;
- if (*str == ';') {
- ++str;
- } else {
- // stop processing the header field; either we are done or the
- // separator was missing
- break;
- }
- while (nsCRT::IsAsciiSpace(*str)) ++str;
- }
- caseCDResult = combineContinuations(segments);
- if (caseBResult && !charsetB.IsEmpty()) {
- // check that the 2231/5987 result decodes properly given the
- // specified character set
- if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
- caseBResult = nullptr;
- }
- if (caseCDResult && !charsetCD.IsEmpty()) {
- // check that the 2231/5987 result decodes properly given the
- // specified character set
- if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
- caseCDResult = nullptr;
- }
- if (caseBResult) {
- // prefer simple 5987 format over 2231 with continuations
- *aResult = caseBResult;
- caseBResult = nullptr;
- charset.Assign(charsetB);
- }
- else if (caseCDResult) {
- // prefer 2231/5987 with or without continuations over plain format
- *aResult = caseCDResult;
- caseCDResult = nullptr;
- charset.Assign(charsetCD);
- }
- else if (caseAResult) {
- *aResult = caseAResult;
- caseAResult = nullptr;
- }
- // free unused stuff
- free(caseAResult);
- free(caseBResult);
- free(caseCDResult);
- // if we have a result
- if (*aResult) {
- // then return charset and lang as well
- if (aLang && !lang.IsEmpty()) {
- uint32_t len = lang.Length();
- *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
- if (*aLang) {
- *(*aLang + len) = 0;
- }
- }
- if (aCharset && !charset.IsEmpty()) {
- uint32_t len = charset.Length();
- *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
- if (*aCharset) {
- *(*aCharset + len) = 0;
- }
- }
- }
- return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
- }
- nsresult
- internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
- bool aOverrideCharset, bool aEatContinuations,
- nsACString& aResult)
- {
- aResult.Truncate();
- if (!aHeaderVal)
- return NS_ERROR_INVALID_ARG;
- if (!*aHeaderVal)
- return NS_OK;
- // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
- // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
- // to UTF-8. Otherwise, just strips away CRLF.
- if (PL_strstr(aHeaderVal, "=?") ||
- (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
- Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
- DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
- } else if (aEatContinuations &&
- (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
- aResult = aHeaderVal;
- } else {
- aEatContinuations = false;
- aResult = aHeaderVal;
- }
- if (aEatContinuations) {
- nsAutoCString temp(aResult);
- temp.ReplaceSubstring("\n\t", " ");
- temp.ReplaceSubstring("\r\t", " ");
- temp.StripChars("\r\n");
- aResult = temp;
- }
- return NS_OK;
- }
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
- const char* aDefaultCharset,
- bool aOverrideCharset,
- bool aEatContinuations,
- nsACString& aResult)
- {
- return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
- aOverrideCharset, aEatContinuations,
- aResult);
- }
- // true if the character is allowed in a RFC 5987 value
- // see RFC 5987, Section 3.2.1, "attr-char"
- bool IsRFC5987AttrChar(char aChar)
- {
- char c = aChar;
- return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '!' || c == '#' || c == '$' || c == '&' ||
- c == '+' || c == '-' || c == '.' || c == '^' ||
- c == '_' || c == '`' || c == '|' || c == '~');
- }
- // percent-decode a value
- // returns false on failure
- bool PercentDecode(nsACString& aValue)
- {
- char *c = (char *) moz_xmalloc(aValue.Length() + 1);
- if (!c) {
- return false;
- }
- strcpy(c, PromiseFlatCString(aValue).get());
- nsUnescape(c);
- aValue.Assign(c);
- free(c);
- return true;
- }
- // Decode a parameter value using the encoding defined in RFC 5987
- //
- // charset "'" [ language ] "'" value-chars
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
- nsACString& aLang,
- nsAString& aResult)
- {
- nsAutoCString charset;
- nsAutoCString language;
- nsAutoCString value;
- uint32_t delimiters = 0;
- const nsCString& encoded = PromiseFlatCString(aParamVal);
- const char *c = encoded.get();
- while (*c) {
- char tc = *c++;
- if (tc == '\'') {
- // single quote
- delimiters++;
- } else if (((unsigned char)tc) >= 128) {
- // fail early, not ASCII
- NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
- return NS_ERROR_INVALID_ARG;
- } else {
- if (delimiters == 0) {
- // valid characters are checked later implicitly
- charset.Append(tc);
- } else if (delimiters == 1) {
- // no value checking for now
- language.Append(tc);
- } else if (delimiters == 2) {
- if (IsRFC5987AttrChar(tc)) {
- value.Append(tc);
- } else if (tc == '%') {
- if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
- // we expect two more characters
- NS_WARNING("broken %-escape in RFC5987-encoded param");
- return NS_ERROR_INVALID_ARG;
- }
- value.Append(tc);
- // we consume two more
- value.Append(*c++);
- value.Append(*c++);
- } else {
- // character not allowed here
- NS_WARNING("invalid character in RFC5987-encoded param");
- return NS_ERROR_INVALID_ARG;
- }
- }
- }
- }
- if (delimiters != 2) {
- NS_WARNING("missing delimiters in RFC5987-encoded param");
- return NS_ERROR_INVALID_ARG;
- }
- // abort early for unsupported encodings
- if (!charset.LowerCaseEqualsLiteral("utf-8")) {
- NS_WARNING("unsupported charset in RFC5987-encoded param");
- return NS_ERROR_INVALID_ARG;
- }
- // percent-decode
- if (!PercentDecode(value)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- // return the encoding
- aLang.Assign(language);
- // finally convert octet sequence to UTF-8 and be done
- nsresult rv = NS_OK;
- nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
- do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
- NS_ENSURE_SUCCESS(rv, rv);
- nsAutoCString utf8;
- rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
- NS_ENSURE_SUCCESS(rv, rv);
- CopyUTF8toUTF16(utf8, aResult);
- return NS_OK;
- }
- nsresult
- internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
- const char* aDefaultCharset, bool aOverrideCharset,
- bool aDecode2047, nsACString& aResult)
- {
- aResult.Truncate();
- // If aCharset is given, aParamValue was obtained from RFC2231/5987
- // encoding and we're pretty sure that it's in aCharset.
- if (aCharset && *aCharset)
- {
- nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
- if (cvtUTF8)
- return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
- true, true, 1, aResult);
- }
- const nsAFlatCString& param = PromiseFlatCString(aParamValue);
- nsAutoCString unQuoted;
- nsACString::const_iterator s, e;
- param.BeginReading(s);
- param.EndReading(e);
- // strip '\' when used to quote CR, LF, '"' and '\'
- for ( ; s != e; ++s) {
- if ((*s == '\\')) {
- if (++s == e) {
- --s; // '\' is at the end. move back and append '\'.
- }
- else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
- --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
- }
- // else : skip '\' and append the quoted character.
- }
- unQuoted.Append(*s);
- }
- aResult = unQuoted;
- nsresult rv = NS_OK;
-
- if (aDecode2047) {
- nsAutoCString decoded;
- // Try RFC 2047 encoding, instead.
- rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
- aOverrideCharset, true, decoded);
- if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
- aResult = decoded;
- }
-
- return rv;
- }
- NS_IMETHODIMP
- nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
- const char* aCharset,
- const char* aDefaultCharset,
- bool aOverrideCharset,
- nsACString& aResult)
- {
- return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
- aOverrideCharset, true, aResult);
- }
- #define ISHEXCHAR(c) \
- ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
- (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
- (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
- // Decode Q encoding (RFC 2047).
- // static
- char *DecodeQ(const char *in, uint32_t length)
- {
- char *out, *dest = 0;
- out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
- if (dest == nullptr)
- return nullptr;
- while (length > 0) {
- unsigned c = 0;
- switch (*in) {
- case '=':
- // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
- if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
- goto badsyntax;
- PR_sscanf(in + 1, "%2X", &c);
- *out++ = (char) c;
- in += 3;
- length -= 3;
- break;
- case '_':
- *out++ = ' ';
- in++;
- length--;
- break;
- default:
- if (*in & 0x80) goto badsyntax;
- *out++ = *in++;
- length--;
- }
- }
- *out++ = '\0';
- for (out = dest; *out ; ++out) {
- if (*out == '\t')
- *out = ' ';
- }
- return dest;
- badsyntax:
- PR_Free(dest);
- return nullptr;
- }
- // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
- // or has ESC which may be an indication that it's in one of many ISO
- // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
- // static
- bool Is7bitNonAsciiString(const char *input, uint32_t len)
- {
- int32_t c;
- enum { hz_initial, // No HZ seen yet
- hz_escaped, // Inside an HZ ~{ escape sequence
- hz_seen, // Have seen at least one complete HZ sequence
- hz_notpresent // Have seen something that is not legal HZ
- } hz_state;
- hz_state = hz_initial;
- while (len) {
- c = uint8_t(*input++);
- len--;
- if (c & 0x80) return false;
- if (c == 0x1B) return true;
- if (c == '~') {
- switch (hz_state) {
- case hz_initial:
- case hz_seen:
- if (*input == '{') {
- hz_state = hz_escaped;
- } else if (*input == '~') {
- // ~~ is the HZ encoding of ~. Skip over second ~ as well
- hz_state = hz_seen;
- input++;
- len--;
- } else {
- hz_state = hz_notpresent;
- }
- break;
- case hz_escaped:
- if (*input == '}') hz_state = hz_seen;
- break;
- default:
- break;
- }
- }
- }
- return hz_state == hz_seen;
- }
- #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
- // copy 'raw' sequences of octets in aInput to aOutput.
- // If aDefaultCharset is specified, the input is assumed to be in the
- // charset and converted to UTF-8. Otherwise, a blind copy is made.
- // If aDefaultCharset is specified, but the conversion to UTF-8
- // is not successful, each octet is replaced by Unicode replacement
- // chars. *aOutput is advanced by the number of output octets.
- // static
- void CopyRawHeader(const char *aInput, uint32_t aLen,
- const char *aDefaultCharset, nsACString &aOutput)
- {
- int32_t c;
- // If aDefaultCharset is not specified, make a blind copy.
- if (!aDefaultCharset || !*aDefaultCharset) {
- aOutput.Append(aInput, aLen);
- return;
- }
- // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
- // A ~ may indicate it is HZ
- while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
- aOutput.Append(char(c));
- aLen--;
- }
- if (!aLen) {
- return;
- }
- aInput--;
- // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
- // string and aDefaultCharset is a 7bit non-ascii charset.
- bool skipCheck = (c == 0x1B || c == '~') &&
- IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
- // If not UTF-8, treat as default charset
- nsCOMPtr<nsIUTF8ConverterService>
- cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
- nsAutoCString utf8Text;
- if (cvtUTF8 &&
- NS_SUCCEEDED(
- cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
- aDefaultCharset, skipCheck, true, 1,
- utf8Text))) {
- aOutput.Append(utf8Text);
- } else { // replace each octet with Unicode replacement char in UTF-8.
- for (uint32_t i = 0; i < aLen; i++) {
- c = uint8_t(*aInput++);
- if (c & 0x80)
- aOutput.Append(REPLACEMENT_CHAR);
- else
- aOutput.Append(char(c));
- }
- }
- }
- nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
- const char *aCharset, nsACString &aResult)
- {
- char *decodedText;
- NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
- if(aQOrBase64 == 'Q')
- decodedText = DecodeQ(aEncoded, aLen);
- else if (aQOrBase64 == 'B') {
- decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
- } else {
- return NS_ERROR_INVALID_ARG;
- }
- if (!decodedText) {
- return NS_ERROR_INVALID_ARG;
- }
- nsresult rv;
- nsCOMPtr<nsIUTF8ConverterService>
- cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
- nsAutoCString utf8Text;
- if (NS_SUCCEEDED(rv)) {
- // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
- rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
- aCharset,
- IS_7BIT_NON_ASCII_CHARSET(aCharset),
- true, 1, utf8Text);
- }
- PR_Free(decodedText);
- if (NS_FAILED(rv)) {
- return rv;
- }
- aResult.Append(utf8Text);
- return NS_OK;
- }
- static const char especials[] = R"(()<>@,;:\"/[]?.=)";
- // |decode_mime_part2_str| taken from comi18n.c
- // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
- // If aOverrideCharset is true, charset in RFC2047-encoded words is
- // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
- // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
- //static
- nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
- bool aOverrideCharset, nsACString &aResult)
- {
- const char *p, *q = nullptr, *r;
- const char *begin; // tracking pointer for where we are in the input buffer
- int32_t isLastEncodedWord = 0;
- const char *charsetStart, *charsetEnd;
- nsAutoCString prevCharset, curCharset;
- nsAutoCString encodedText;
- char prevEncoding = '\0', curEncoding;
- nsresult rv;
- begin = aHeader;
- // To avoid buffer realloc, if possible, set capacity in advance. No
- // matter what, more than 3x expansion can never happen for all charsets
- // supported by Mozilla. SCSU/BCSU with the sliding window set to a
- // non-BMP block may be exceptions, but Mozilla does not support them.
- // Neither any known mail/news program use them. Even if there's, we're
- // safe because we don't use a raw *char any more.
- aResult.SetCapacity(3 * strlen(aHeader));
- while ((p = PL_strstr(begin, "=?")) != 0) {
- if (isLastEncodedWord) {
- // See if it's all whitespace.
- for (q = begin; q < p; ++q) {
- if (!PL_strchr(" \t\r\n", *q)) break;
- }
- }
- if (!isLastEncodedWord || q < p) {
- if (!encodedText.IsEmpty()) {
- rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
- prevEncoding, prevCharset.get(), aResult);
- if (NS_FAILED(rv)) {
- aResult.Append(encodedText);
- }
- encodedText.Truncate();
- prevCharset.Truncate();
- prevEncoding = '\0';
- }
- // copy the part before the encoded-word
- CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
- begin = p;
- }
- p += 2;
- // Get charset info
- charsetStart = p;
- charsetEnd = 0;
- for (q = p; *q != '?'; q++) {
- if (*q <= ' ' || PL_strchr(especials, *q)) {
- goto badsyntax;
- }
- // RFC 2231 section 5
- if (!charsetEnd && *q == '*') {
- charsetEnd = q;
- }
- }
- if (!charsetEnd) {
- charsetEnd = q;
- }
- q++;
- curEncoding = nsCRT::ToUpper(*q);
- if (curEncoding != 'Q' && curEncoding != 'B')
- goto badsyntax;
- if (q[1] != '?')
- goto badsyntax;
- // loop-wise, keep going until we hit "?=". the inner check handles the
- // nul terminator should the string terminate before we hit the right
- // marker. (And the r[1] will never reach beyond the end of the string
- // because *r != '?' is true if r is the nul character.)
- for (r = q + 2; *r != '?' || r[1] != '='; r++) {
- if (*r < ' ') goto badsyntax;
- }
- if (r == q + 2) {
- // it's empty, skip
- begin = r + 2;
- isLastEncodedWord = 1;
- continue;
- }
- curCharset.Assign(charsetStart, charsetEnd - charsetStart);
- // Override charset if requested. Never override labeled UTF-8.
- // Use default charset instead of UNKNOWN-8BIT
- if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
- || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
- ) {
- curCharset = aDefaultCharset;
- }
- const char *R;
- R = r;
- if (curEncoding == 'B') {
- // bug 227290. ignore an extraneous '=' at the end.
- // (# of characters in B-encoded part has to be a multiple of 4)
- int32_t n = r - (q + 2);
- R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
- }
- // Bug 493544. Don't decode the encoded text until it ends
- if (R[-1] != '='
- && (prevCharset.IsEmpty()
- || (curCharset == prevCharset && curEncoding == prevEncoding))
- ) {
- encodedText.Append(q + 2, R - (q + 2));
- prevCharset = curCharset;
- prevEncoding = curEncoding;
- begin = r + 2;
- isLastEncodedWord = 1;
- continue;
- }
- bool bDecoded; // If the current line has been decoded.
- bDecoded = false;
- if (!encodedText.IsEmpty()) {
- if (curCharset == prevCharset && curEncoding == prevEncoding) {
- encodedText.Append(q + 2, R - (q + 2));
- bDecoded = true;
- }
- rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
- prevEncoding, prevCharset.get(), aResult);
- if (NS_FAILED(rv)) {
- aResult.Append(encodedText);
- }
- encodedText.Truncate();
- prevCharset.Truncate();
- prevEncoding = '\0';
- }
- if (!bDecoded) {
- rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
- curCharset.get(), aResult);
- if (NS_FAILED(rv)) {
- aResult.Append(encodedText);
- }
- }
- begin = r + 2;
- isLastEncodedWord = 1;
- continue;
- badsyntax:
- if (!encodedText.IsEmpty()) {
- rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
- prevEncoding, prevCharset.get(), aResult);
- if (NS_FAILED(rv)) {
- aResult.Append(encodedText);
- }
- encodedText.Truncate();
- prevCharset.Truncate();
- }
- // copy the part before the encoded-word
- aResult.Append(begin, p - begin);
- begin = p;
- isLastEncodedWord = 0;
- }
- if (!encodedText.IsEmpty()) {
- rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
- prevEncoding, prevCharset.get(), aResult);
- if (NS_FAILED(rv)) {
- aResult.Append(encodedText);
- }
- }
- // put the tail back
- CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
- nsAutoCString tempStr(aResult);
- tempStr.ReplaceChar('\t', ' ');
- aResult = tempStr;
- return NS_OK;
- }
|