12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169 |
- /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
- /* vim:set ts=4 sw=4 sts=4 et cindent: */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include "mozilla/RangedPtr.h"
- #include <algorithm>
- #include <iterator>
- #include "nsURLHelper.h"
- #include "nsIFile.h"
- #include "nsIURLParser.h"
- #include "nsCOMPtr.h"
- #include "nsCRT.h"
- #include "nsNetCID.h"
- #include "mozilla/Preferences.h"
- #include "prnetdb.h"
- #include "mozilla/Tokenizer.h"
- using namespace mozilla;
- //----------------------------------------------------------------------------
- // Init/Shutdown
- //----------------------------------------------------------------------------
- static bool gInitialized = false;
- static nsIURLParser *gNoAuthURLParser = nullptr;
- static nsIURLParser *gAuthURLParser = nullptr;
- static nsIURLParser *gStdURLParser = nullptr;
- static int32_t gMaxLength = 1048576; // Default: 1MB
- static void
- InitGlobals()
- {
- nsCOMPtr<nsIURLParser> parser;
- parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
- NS_ASSERTION(parser, "failed getting 'noauth' url parser");
- if (parser) {
- gNoAuthURLParser = parser.get();
- NS_ADDREF(gNoAuthURLParser);
- }
- parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
- NS_ASSERTION(parser, "failed getting 'auth' url parser");
- if (parser) {
- gAuthURLParser = parser.get();
- NS_ADDREF(gAuthURLParser);
- }
- parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
- NS_ASSERTION(parser, "failed getting 'std' url parser");
- if (parser) {
- gStdURLParser = parser.get();
- NS_ADDREF(gStdURLParser);
- }
- gInitialized = true;
- Preferences::AddIntVarCache(&gMaxLength,
- "network.standard-url.max-length", 1048576);
- }
- void
- net_ShutdownURLHelper()
- {
- if (gInitialized) {
- NS_IF_RELEASE(gNoAuthURLParser);
- NS_IF_RELEASE(gAuthURLParser);
- NS_IF_RELEASE(gStdURLParser);
- gInitialized = false;
- }
- }
- int32_t net_GetURLMaxLength()
- {
- return gMaxLength;
- }
- //----------------------------------------------------------------------------
- // nsIURLParser getters
- //----------------------------------------------------------------------------
- nsIURLParser *
- net_GetAuthURLParser()
- {
- if (!gInitialized)
- InitGlobals();
- return gAuthURLParser;
- }
- nsIURLParser *
- net_GetNoAuthURLParser()
- {
- if (!gInitialized)
- InitGlobals();
- return gNoAuthURLParser;
- }
- nsIURLParser *
- net_GetStdURLParser()
- {
- if (!gInitialized)
- InitGlobals();
- return gStdURLParser;
- }
- //---------------------------------------------------------------------------
- // GetFileFromURLSpec implementations
- //---------------------------------------------------------------------------
- nsresult
- net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
- {
- nsAutoCString escPath;
- nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
- if (NS_FAILED(rv))
- return rv;
- if (escPath.Last() != '/') {
- escPath += '/';
- }
- result = escPath;
- return NS_OK;
- }
- nsresult
- net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
- {
- nsAutoCString escPath;
- nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
- if (NS_FAILED(rv))
- return rv;
- // if this file references a directory, then we need to ensure that the
- // URL ends with a slash. this is important since it affects the rules
- // for relative URL resolution when this URL is used as a base URL.
- // if the file does not exist, then we make no assumption about its type,
- // and simply leave the URL unmodified.
- if (escPath.Last() != '/') {
- bool dir;
- rv = aFile->IsDirectory(&dir);
- if (NS_SUCCEEDED(rv) && dir)
- escPath += '/';
- }
- result = escPath;
- return NS_OK;
- }
- //----------------------------------------------------------------------------
- // file:// URL parsing
- //----------------------------------------------------------------------------
- nsresult
- net_ParseFileURL(const nsACString &inURL,
- nsACString &outDirectory,
- nsACString &outFileBaseName,
- nsACString &outFileExtension)
- {
- nsresult rv;
- if (inURL.Length() > (uint32_t) gMaxLength) {
- return NS_ERROR_MALFORMED_URI;
- }
- outDirectory.Truncate();
- outFileBaseName.Truncate();
- outFileExtension.Truncate();
- const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
- const char *url = flatURL.get();
- nsAutoCString scheme;
- rv = net_ExtractURLScheme(flatURL, scheme);
- if (NS_FAILED(rv)) return rv;
- if (!scheme.EqualsLiteral("file")) {
- NS_ERROR("must be a file:// url");
- return NS_ERROR_UNEXPECTED;
- }
- nsIURLParser *parser = net_GetNoAuthURLParser();
- NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
- uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
- int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
- // invoke the parser to extract the URL path
- rv = parser->ParseURL(url, flatURL.Length(),
- nullptr, nullptr, // don't care about scheme
- nullptr, nullptr, // don't care about authority
- &pathPos, &pathLen);
- if (NS_FAILED(rv)) return rv;
- // invoke the parser to extract filepath from the path
- rv = parser->ParsePath(url + pathPos, pathLen,
- &filepathPos, &filepathLen,
- nullptr, nullptr, // don't care about query
- nullptr, nullptr); // don't care about ref
- if (NS_FAILED(rv)) return rv;
- filepathPos += pathPos;
- // invoke the parser to extract the directory and filename from filepath
- rv = parser->ParseFilePath(url + filepathPos, filepathLen,
- &directoryPos, &directoryLen,
- &basenamePos, &basenameLen,
- &extensionPos, &extensionLen);
- if (NS_FAILED(rv)) return rv;
- if (directoryLen > 0)
- outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
- if (basenameLen > 0)
- outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
- if (extensionLen > 0)
- outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
- // since we are using a no-auth url parser, there will never be a host
- // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
- return NS_OK;
- }
- //----------------------------------------------------------------------------
- // path manipulation functions
- //----------------------------------------------------------------------------
- // Replace all /./ with a / while resolving URLs
- // But only till #?
- void
- net_CoalesceDirs(netCoalesceFlags flags, char* path)
- {
- /* Stolen from the old netlib's mkparse.c.
- *
- * modifies a url of the form /foo/../foo1 -> /foo1
- * and /foo/./foo1 -> /foo/foo1
- * and /foo/foo1/.. -> /foo/
- */
- char *fwdPtr = path;
- char *urlPtr = path;
- char *lastslash = path;
- uint32_t traversal = 0;
- uint32_t special_ftp_len = 0;
- /* Remember if this url is a special ftp one: */
- if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
- {
- /* some schemes (for example ftp) have the speciality that
- the path can begin // or /%2F to mark the root of the
- servers filesystem, a simple / only marks the root relative
- to the user loging in. We remember the length of the marker */
- if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
- special_ftp_len = 4;
- else if (nsCRT::strncmp(path,"//",2) == 0 )
- special_ftp_len = 2;
- }
- /* find the last slash before # or ? */
- for(; (*fwdPtr != '\0') &&
- (*fwdPtr != '?') &&
- (*fwdPtr != '#'); ++fwdPtr)
- {
- }
- /* found nothing, but go back one only */
- /* if there is something to go back to */
- if (fwdPtr != path && *fwdPtr == '\0')
- {
- --fwdPtr;
- }
- /* search the slash */
- for(; (fwdPtr != path) &&
- (*fwdPtr != '/'); --fwdPtr)
- {
- }
- lastslash = fwdPtr;
- fwdPtr = path;
- /* replace all %2E or %2e with . in the path */
- /* but stop at lastchar if non null */
- for(; (*fwdPtr != '\0') &&
- (*fwdPtr != '?') &&
- (*fwdPtr != '#') &&
- (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
- {
- if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
- (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
- {
- *urlPtr++ = '.';
- ++fwdPtr;
- ++fwdPtr;
- }
- else
- {
- *urlPtr++ = *fwdPtr;
- }
- }
- // Copy remaining stuff past the #?;
- for (; *fwdPtr != '\0'; ++fwdPtr)
- {
- *urlPtr++ = *fwdPtr;
- }
- *urlPtr = '\0'; // terminate the url
- // start again, this time for real
- fwdPtr = path;
- urlPtr = path;
- for(; (*fwdPtr != '\0') &&
- (*fwdPtr != '?') &&
- (*fwdPtr != '#'); ++fwdPtr)
- {
- if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
- {
- // remove . followed by slash
- ++fwdPtr;
- }
- else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
- (*(fwdPtr+3) == '/' ||
- *(fwdPtr+3) == '\0' || // This will take care of
- *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
- *(fwdPtr+3) == '#'))
- {
- // remove foo/..
- // reverse the urlPtr to the previous slash if possible
- // if url does not allow relative root then drop .. above root
- // otherwise retain them in the path
- if(traversal > 0 || !(flags &
- NET_COALESCE_ALLOW_RELATIVE_ROOT))
- {
- if (urlPtr != path)
- urlPtr--; // we must be going back at least by one
- for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
- ; // null body
- --traversal; // count back
- // forward the fwdPtr past the ../
- fwdPtr += 2;
- // if we have reached the beginning of the path
- // while searching for the previous / and we remember
- // that it is an url that begins with /%2F then
- // advance urlPtr again by 3 chars because /%2F already
- // marks the root of the path
- if (urlPtr == path && special_ftp_len > 3)
- {
- ++urlPtr;
- ++urlPtr;
- ++urlPtr;
- }
- // special case if we have reached the end
- // to preserve the last /
- if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
- ++urlPtr;
- }
- else
- {
- // there are to much /.. in this path, just copy them instead.
- // forward the urlPtr past the /.. and copying it
- // However if we remember it is an url that starts with
- // /%2F and urlPtr just points at the "F" of "/%2F" then do
- // not overwrite it with the /, just copy .. and move forward
- // urlPtr.
- if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
- ++urlPtr;
- else
- *urlPtr++ = *fwdPtr;
- ++fwdPtr;
- *urlPtr++ = *fwdPtr;
- ++fwdPtr;
- *urlPtr++ = *fwdPtr;
- }
- }
- else
- {
- // count the hierachie, but only if we do not have reached
- // the root of some special urls with a special root marker
- if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
- (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
- traversal++;
- // copy the url incrementaly
- *urlPtr++ = *fwdPtr;
- }
- }
- /*
- * Now lets remove trailing . case
- * /foo/foo1/. -> /foo/foo1/
- */
- if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
- urlPtr--;
- // Copy remaining stuff past the #?;
- for (; *fwdPtr != '\0'; ++fwdPtr)
- {
- *urlPtr++ = *fwdPtr;
- }
- *urlPtr = '\0'; // terminate the url
- }
- nsresult
- net_ResolveRelativePath(const nsACString &relativePath,
- const nsACString &basePath,
- nsACString &result)
- {
- nsAutoCString name;
- nsAutoCString path(basePath);
- bool needsDelim = false;
- if ( !path.IsEmpty() ) {
- char16_t last = path.Last();
- needsDelim = !(last == '/');
- }
- nsACString::const_iterator beg, end;
- relativePath.BeginReading(beg);
- relativePath.EndReading(end);
- bool stop = false;
- char c;
- for (; !stop; ++beg) {
- c = (beg == end) ? '\0' : *beg;
- //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
- switch (c) {
- case '\0':
- case '#':
- case '?':
- stop = true;
- MOZ_FALLTHROUGH;
- case '/':
- // delimiter found
- if (name.EqualsLiteral("..")) {
- // pop path
- // If we already have the delim at end, then
- // skip over that when searching for next one to the left
- int32_t offset = path.Length() - (needsDelim ? 1 : 2);
- // First check for errors
- if (offset < 0 )
- return NS_ERROR_MALFORMED_URI;
- int32_t pos = path.RFind("/", false, offset);
- if (pos >= 0)
- path.Truncate(pos + 1);
- else
- path.Truncate();
- }
- else if (name.IsEmpty() || name.EqualsLiteral(".")) {
- // do nothing
- }
- else {
- // append name to path
- if (needsDelim)
- path += '/';
- path += name;
- needsDelim = true;
- }
- name.Truncate();
- break;
- default:
- // append char to name
- name += c;
- }
- }
- // append anything left on relativePath (e.g. #..., ;..., ?...)
- if (c != '\0')
- path += Substring(--beg, end);
- result = path;
- return NS_OK;
- }
- //----------------------------------------------------------------------------
- // scheme fu
- //----------------------------------------------------------------------------
- static bool isAsciiAlpha(char c) {
- return nsCRT::IsAsciiAlpha(c);
- }
- static bool
- net_IsValidSchemeChar(const char aChar)
- {
- if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
- aChar == '+' || aChar == '.' || aChar == '-') {
- return true;
- }
- return false;
- }
- /* Extract URI-Scheme if possible */
- nsresult
- net_ExtractURLScheme(const nsACString &inURI,
- nsACString& scheme)
- {
- nsACString::const_iterator start, end;
- inURI.BeginReading(start);
- inURI.EndReading(end);
- // Strip C0 and space from begining
- while (start != end) {
- if ((uint8_t) *start > 0x20) {
- break;
- }
- start++;
- }
- Tokenizer p(Substring(start, end), "\r\n\t");
- p.Record();
- if (!p.CheckChar(isAsciiAlpha)) {
- // First char must be alpha
- return NS_ERROR_MALFORMED_URI;
- }
- while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
- // Skip valid scheme characters or \r\n\t
- }
- if (!p.CheckChar(':')) {
- return NS_ERROR_MALFORMED_URI;
- }
- p.Claim(scheme);
- scheme.StripChars("\r\n\t");
- return NS_OK;
- }
- bool
- net_IsValidScheme(const char *scheme, uint32_t schemeLen)
- {
- // first char must be alpha
- if (!nsCRT::IsAsciiAlpha(*scheme))
- return false;
- // nsCStrings may have embedded nulls -- reject those too
- for (; schemeLen; ++scheme, --schemeLen) {
- if (!(nsCRT::IsAsciiAlpha(*scheme) ||
- nsCRT::IsAsciiDigit(*scheme) ||
- *scheme == '+' ||
- *scheme == '.' ||
- *scheme == '-'))
- return false;
- }
- return true;
- }
- bool
- net_IsAbsoluteURL(const nsACString& uri)
- {
- nsACString::const_iterator start, end;
- uri.BeginReading(start);
- uri.EndReading(end);
- // Strip C0 and space from begining
- while (start != end) {
- if ((uint8_t) *start > 0x20) {
- break;
- }
- start++;
- }
- Tokenizer p(Substring(start, end), "\r\n\t");
- // First char must be alpha
- if (!p.CheckChar(isAsciiAlpha)) {
- return false;
- }
- while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
- // Skip valid scheme characters or \r\n\t
- }
- if (!p.CheckChar(':')) {
- return false;
- }
- p.SkipWhites();
- if (!p.CheckChar('/')) {
- return false;
- }
- p.SkipWhites();
- if (p.CheckChar('/')) {
- // aSpec is really absolute. Ignore aBaseURI in this case
- return true;
- }
- return false;
- }
- void
- net_FilterURIString(const nsACString& input, nsACString& result)
- {
- const char kCharsToStrip[] = "\r\n\t";
- result.Truncate();
- auto start = input.BeginReading();
- auto end = input.EndReading();
- // Trim off leading and trailing invalid chars.
- auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
- auto newStart = std::find_if(start, end, charFilter);
- auto newEnd = std::find_if(
- std::reverse_iterator<decltype(end)>(end),
- std::reverse_iterator<decltype(newStart)>(newStart),
- charFilter).base();
- // Check if chars need to be stripped.
- auto itr = std::find_first_of(
- newStart, newEnd, std::begin(kCharsToStrip), std::end(kCharsToStrip));
- const bool needsStrip = itr != newEnd;
- // Just use the passed in string rather than creating new copies if no
- // changes are necessary.
- if (newStart == start && newEnd == end && !needsStrip) {
- result = input;
- return;
- }
- result.Assign(Substring(newStart, newEnd));
- if (needsStrip) {
- result.StripChars(kCharsToStrip);
- }
- }
- #if defined(XP_WIN)
- bool
- net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
- {
- bool writing = false;
- nsACString::const_iterator beginIter, endIter;
- aURL.BeginReading(beginIter);
- aURL.EndReading(endIter);
- const char *s, *begin = beginIter.get();
- for (s = begin; s != endIter.get(); ++s)
- {
- if (*s == '\\')
- {
- writing = true;
- if (s > begin)
- aResultBuf.Append(begin, s - begin);
- aResultBuf += '/';
- begin = s + 1;
- }
- }
- if (writing && s > begin)
- aResultBuf.Append(begin, s - begin);
- return writing;
- }
- #endif
- //----------------------------------------------------------------------------
- // miscellaneous (i.e., stuff that should really be elsewhere)
- //----------------------------------------------------------------------------
- static inline
- void ToLower(char &c)
- {
- if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
- c += 'a' - 'A';
- }
- void
- net_ToLowerCase(char *str, uint32_t length)
- {
- for (char *end = str + length; str < end; ++str)
- ToLower(*str);
- }
- void
- net_ToLowerCase(char *str)
- {
- for (; *str; ++str)
- ToLower(*str);
- }
- char *
- net_FindCharInSet(const char *iter, const char *stop, const char *set)
- {
- for (; iter != stop && *iter; ++iter) {
- for (const char *s = set; *s; ++s) {
- if (*iter == *s)
- return (char *) iter;
- }
- }
- return (char *) iter;
- }
- char *
- net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
- {
- repeat:
- for (const char *s = set; *s; ++s) {
- if (*iter == *s) {
- if (++iter == stop)
- break;
- goto repeat;
- }
- }
- return (char *) iter;
- }
- char *
- net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
- {
- --iter;
- --stop;
- if (iter == stop)
- return (char *) iter;
- repeat:
- for (const char *s = set; *s; ++s) {
- if (*iter == *s) {
- if (--iter == stop)
- break;
- goto repeat;
- }
- }
- return (char *) iter;
- }
- #define HTTP_LWS " \t"
- // Return the index of the closing quote of the string, if any
- static uint32_t
- net_FindStringEnd(const nsCString& flatStr,
- uint32_t stringStart,
- char stringDelim)
- {
- NS_ASSERTION(stringStart < flatStr.Length() &&
- flatStr.CharAt(stringStart) == stringDelim &&
- (stringDelim == '"' || stringDelim == '\''),
- "Invalid stringStart");
- const char set[] = { stringDelim, '\\', '\0' };
- do {
- // stringStart points to either the start quote or the last
- // escaped char (the char following a '\\')
-
- // Write to searchStart here, so that when we get back to the
- // top of the loop right outside this one we search from the
- // right place.
- uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
- if (stringEnd == uint32_t(kNotFound))
- return flatStr.Length();
- if (flatStr.CharAt(stringEnd) == '\\') {
- // Hit a backslash-escaped char. Need to skip over it.
- stringStart = stringEnd + 1;
- if (stringStart == flatStr.Length())
- return stringStart;
- // Go back to looking for the next escape or the string end
- continue;
- }
- return stringEnd;
- } while (true);
- NS_NOTREACHED("How did we get here?");
- return flatStr.Length();
- }
-
- static uint32_t
- net_FindMediaDelimiter(const nsCString& flatStr,
- uint32_t searchStart,
- char delimiter)
- {
- do {
- // searchStart points to the spot from which we should start looking
- // for the delimiter.
- const char delimStr[] = { delimiter, '"', '\0' };
- uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
- if (curDelimPos == uint32_t(kNotFound))
- return flatStr.Length();
-
- char ch = flatStr.CharAt(curDelimPos);
- if (ch == delimiter) {
- // Found delimiter
- return curDelimPos;
- }
- // We hit the start of a quoted string. Look for its end.
- searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
- if (searchStart == flatStr.Length())
- return searchStart;
- ++searchStart;
- // searchStart now points to the first char after the end of the
- // string, so just go back to the top of the loop and look for
- // |delimiter| again.
- } while (true);
- NS_NOTREACHED("How did we get here?");
- return flatStr.Length();
- }
- // aOffset should be added to aCharsetStart and aCharsetEnd if this
- // function sets them.
- static void
- net_ParseMediaType(const nsACString &aMediaTypeStr,
- nsACString &aContentType,
- nsACString &aContentCharset,
- int32_t aOffset,
- bool *aHadCharset,
- int32_t *aCharsetStart,
- int32_t *aCharsetEnd,
- bool aStrict)
- {
- const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
- const char* start = flatStr.get();
- const char* end = start + flatStr.Length();
- // Trim LWS leading and trailing whitespace from type. We include '(' in
- // the trailing trim set to catch media-type comments, which are not at all
- // standard, but may occur in rare cases.
- const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
- const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
- const char* charset = "";
- const char* charsetEnd = charset;
- int32_t charsetParamStart = 0;
- int32_t charsetParamEnd = 0;
- uint32_t consumed = typeEnd - type;
- // Iterate over parameters
- bool typeHasCharset = false;
- uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
- if (paramStart != uint32_t(kNotFound)) {
- // We have parameters. Iterate over them.
- uint32_t curParamStart = paramStart + 1;
- do {
- uint32_t curParamEnd =
- net_FindMediaDelimiter(flatStr, curParamStart, ';');
- const char* paramName = net_FindCharNotInSet(start + curParamStart,
- start + curParamEnd,
- HTTP_LWS);
- static const char charsetStr[] = "charset=";
- if (PL_strncasecmp(paramName, charsetStr,
- sizeof(charsetStr) - 1) == 0) {
- charset = paramName + sizeof(charsetStr) - 1;
- charsetEnd = start + curParamEnd;
- typeHasCharset = true;
- charsetParamStart = curParamStart - 1;
- charsetParamEnd = curParamEnd;
- }
- consumed = curParamEnd;
- curParamStart = curParamEnd + 1;
- } while (curParamStart < flatStr.Length());
- }
- bool charsetNeedsQuotedStringUnescaping = false;
- if (typeHasCharset) {
- // Trim LWS leading and trailing whitespace from charset. We include
- // '(' in the trailing trim set to catch media-type comments, which are
- // not at all standard, but may occur in rare cases.
- charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
- if (*charset == '"') {
- charsetNeedsQuotedStringUnescaping = true;
- charsetEnd =
- start + net_FindStringEnd(flatStr, charset - start, *charset);
- charset++;
- NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
- } else {
- charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
- }
- }
- // if the server sent "*/*", it is meaningless, so do not store it.
- // also, if type is the same as aContentType, then just update the
- // charset. however, if charset is empty and aContentType hasn't
- // changed, then don't wipe-out an existing aContentCharset. We
- // also want to reject a mime-type if it does not include a slash.
- // some servers give junk after the charset parameter, which may
- // include a comma, so this check makes us a bit more tolerant.
- if (type != typeEnd &&
- memchr(type, '/', typeEnd - type) != nullptr &&
- (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end) :
- (strncmp(type, "*/*", typeEnd - type) != 0))) {
- // Common case here is that aContentType is empty
- bool eq = !aContentType.IsEmpty() &&
- aContentType.Equals(Substring(type, typeEnd),
- nsCaseInsensitiveCStringComparator());
- if (!eq) {
- aContentType.Assign(type, typeEnd - type);
- ToLowerCase(aContentType);
- }
- if ((!eq && *aHadCharset) || typeHasCharset) {
- *aHadCharset = true;
- if (charsetNeedsQuotedStringUnescaping) {
- // parameters using the "quoted-string" syntax need
- // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
- aContentCharset.Truncate();
- for (const char *c = charset; c != charsetEnd; c++) {
- if (*c == '\\' && c + 1 != charsetEnd) {
- // eat escape
- c++;
- }
- aContentCharset.Append(*c);
- }
- }
- else {
- aContentCharset.Assign(charset, charsetEnd - charset);
- }
- if (typeHasCharset) {
- *aCharsetStart = charsetParamStart + aOffset;
- *aCharsetEnd = charsetParamEnd + aOffset;
- }
- }
- // Only set a new charset position if this is a different type
- // from the last one we had and it doesn't already have a
- // charset param. If this is the same type, we probably want
- // to leave the charset position on its first occurrence.
- if (!eq && !typeHasCharset) {
- int32_t charsetStart = int32_t(paramStart);
- if (charsetStart == kNotFound)
- charsetStart = flatStr.Length();
- *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
- }
- }
- }
- #undef HTTP_LWS
- void
- net_ParseContentType(const nsACString &aHeaderStr,
- nsACString &aContentType,
- nsACString &aContentCharset,
- bool *aHadCharset)
- {
- int32_t dummy1, dummy2;
- net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
- aHadCharset, &dummy1, &dummy2);
- }
- void
- net_ParseContentType(const nsACString &aHeaderStr,
- nsACString &aContentType,
- nsACString &aContentCharset,
- bool *aHadCharset,
- int32_t *aCharsetStart,
- int32_t *aCharsetEnd)
- {
- //
- // Augmented BNF (from RFC 2616 section 3.7):
- //
- // header-value = media-type *( LWS "," LWS media-type )
- // media-type = type "/" subtype *( LWS ";" LWS parameter )
- // type = token
- // subtype = token
- // parameter = attribute "=" value
- // attribute = token
- // value = token | quoted-string
- //
- //
- // Examples:
- //
- // text/html
- // text/html, text/html
- // text/html,text/html; charset=ISO-8859-1
- // text/html,text/html; charset="ISO-8859-1"
- // text/html;charset=ISO-8859-1, text/html
- // text/html;charset='ISO-8859-1', text/html
- // application/octet-stream
- //
- *aHadCharset = false;
- const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
-
- // iterate over media-types. Note that ',' characters can happen
- // inside quoted strings, so we need to watch out for that.
- uint32_t curTypeStart = 0;
- do {
- // curTypeStart points to the start of the current media-type. We want
- // to look for its end.
- uint32_t curTypeEnd =
- net_FindMediaDelimiter(flatStr, curTypeStart, ',');
-
- // At this point curTypeEnd points to the spot where the media-type
- // starting at curTypeEnd ends. Time to parse that!
- net_ParseMediaType(Substring(flatStr, curTypeStart,
- curTypeEnd - curTypeStart),
- aContentType, aContentCharset, curTypeStart,
- aHadCharset, aCharsetStart, aCharsetEnd, false);
- // And let's move on to the next media-type
- curTypeStart = curTypeEnd + 1;
- } while (curTypeStart < flatStr.Length());
- }
- void
- net_ParseRequestContentType(const nsACString &aHeaderStr,
- nsACString &aContentType,
- nsACString &aContentCharset,
- bool *aHadCharset)
- {
- //
- // Augmented BNF (from RFC 7231 section 3.1.1.1):
- //
- // media-type = type "/" subtype *( OWS ";" OWS parameter )
- // type = token
- // subtype = token
- // parameter = token "=" ( token / quoted-string )
- //
- // Examples:
- //
- // text/html
- // text/html; charset=ISO-8859-1
- // text/html; charset="ISO-8859-1"
- // application/octet-stream
- //
- aContentType.Truncate();
- aContentCharset.Truncate();
- *aHadCharset = false;
- const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
- // At this point curTypeEnd points to the spot where the media-type
- // starting at curTypeEnd ends. Time to parse that!
- nsAutoCString contentType, contentCharset;
- bool hadCharset = false;
- int32_t dummy1, dummy2;
- uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
- if (typeEnd != flatStr.Length()) {
- // We have some stuff left at the end, so this is not a valid
- // request Content-Type header.
- return;
- }
- net_ParseMediaType(flatStr, contentType, contentCharset, 0,
- &hadCharset, &dummy1, &dummy2, true);
- aContentType = contentType;
- aContentCharset = contentCharset;
- *aHadCharset = hadCharset;
- }
- bool
- net_IsValidHostName(const nsCSubstring &host)
- {
- const char *end = host.EndReading();
- // Use explicit whitelists to select which characters we are
- // willing to send to lower-level DNS logic. This is more
- // self-documenting, and can also be slightly faster than the
- // blacklist approach, since DNS names are the common case, and
- // the commonest characters will tend to be near the start of
- // the list.
- // Whitelist for DNS names (RFC 1035) with extra characters added
- // for pragmatic reasons "$+_"
- // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
- if (net_FindCharNotInSet(host.BeginReading(), end,
- "abcdefghijklmnopqrstuvwxyz"
- ".-0123456789"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
- return true;
- // Might be a valid IPv6 link-local address containing a percent sign
- nsAutoCString strhost(host);
- PRNetAddr addr;
- return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
- }
- bool
- net_IsValidIPv4Addr(const char *addr, int32_t addrLen)
- {
- RangedPtr<const char> p(addr, addrLen);
- int32_t octet = -1; // means no digit yet
- int32_t dotCount = 0; // number of dots in the address
- for (; addrLen; ++p, --addrLen) {
- if (*p == '.') {
- dotCount++;
- if (octet == -1) {
- // invalid octet
- return false;
- }
- octet = -1;
- } else if (*p >= '0' && *p <='9') {
- if (octet == 0) {
- // leading 0 is not allowed
- return false;
- } else if (octet == -1) {
- octet = *p - '0';
- } else {
- octet *= 10;
- octet += *p - '0';
- if (octet > 255)
- return false;
- }
- } else {
- // invalid character
- return false;
- }
- }
- return (dotCount == 3 && octet != -1);
- }
- bool
- net_IsValidIPv6Addr(const char *addr, int32_t addrLen)
- {
- RangedPtr<const char> p(addr, addrLen);
- int32_t digits = 0; // number of digits in current block
- int32_t colons = 0; // number of colons in a row during parsing
- int32_t blocks = 0; // number of hexadecimal blocks
- bool haveZeros = false; // true if double colon is present in the address
- for (; addrLen; ++p, --addrLen) {
- if (*p == ':') {
- if (colons == 0) {
- if (digits != 0) {
- digits = 0;
- blocks++;
- }
- } else if (colons == 1) {
- if (haveZeros)
- return false; // only one occurrence is allowed
- haveZeros = true;
- } else {
- // too many colons in a row
- return false;
- }
- colons++;
- } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
- (*p >= 'A' && *p <= 'F')) {
- if (colons == 1 && blocks == 0) // starts with a single colon
- return false;
- if (digits == 4) // too many digits
- return false;
- colons = 0;
- digits++;
- } else if (*p == '.') {
- // check valid IPv4 from the beginning of the last block
- if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
- return false;
- return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
- } else {
- // invalid character
- return false;
- }
- }
- if (colons == 1) // ends with a single colon
- return false;
- if (digits) // there is a block at the end
- blocks++;
- return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);
- }
|