nsURLHelper.cpp 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169
  1. /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
  2. /* vim:set ts=4 sw=4 sts=4 et cindent: */
  3. /* This Source Code Form is subject to the terms of the Mozilla Public
  4. * License, v. 2.0. If a copy of the MPL was not distributed with this
  5. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6. #include "mozilla/RangedPtr.h"
  7. #include <algorithm>
  8. #include <iterator>
  9. #include "nsURLHelper.h"
  10. #include "nsIFile.h"
  11. #include "nsIURLParser.h"
  12. #include "nsCOMPtr.h"
  13. #include "nsCRT.h"
  14. #include "nsNetCID.h"
  15. #include "mozilla/Preferences.h"
  16. #include "prnetdb.h"
  17. #include "mozilla/Tokenizer.h"
  18. using namespace mozilla;
  19. //----------------------------------------------------------------------------
  20. // Init/Shutdown
  21. //----------------------------------------------------------------------------
  22. static bool gInitialized = false;
  23. static nsIURLParser *gNoAuthURLParser = nullptr;
  24. static nsIURLParser *gAuthURLParser = nullptr;
  25. static nsIURLParser *gStdURLParser = nullptr;
  26. static int32_t gMaxLength = 1048576; // Default: 1MB
  27. static void
  28. InitGlobals()
  29. {
  30. nsCOMPtr<nsIURLParser> parser;
  31. parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
  32. NS_ASSERTION(parser, "failed getting 'noauth' url parser");
  33. if (parser) {
  34. gNoAuthURLParser = parser.get();
  35. NS_ADDREF(gNoAuthURLParser);
  36. }
  37. parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
  38. NS_ASSERTION(parser, "failed getting 'auth' url parser");
  39. if (parser) {
  40. gAuthURLParser = parser.get();
  41. NS_ADDREF(gAuthURLParser);
  42. }
  43. parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
  44. NS_ASSERTION(parser, "failed getting 'std' url parser");
  45. if (parser) {
  46. gStdURLParser = parser.get();
  47. NS_ADDREF(gStdURLParser);
  48. }
  49. gInitialized = true;
  50. Preferences::AddIntVarCache(&gMaxLength,
  51. "network.standard-url.max-length", 1048576);
  52. }
  53. void
  54. net_ShutdownURLHelper()
  55. {
  56. if (gInitialized) {
  57. NS_IF_RELEASE(gNoAuthURLParser);
  58. NS_IF_RELEASE(gAuthURLParser);
  59. NS_IF_RELEASE(gStdURLParser);
  60. gInitialized = false;
  61. }
  62. }
  63. int32_t net_GetURLMaxLength()
  64. {
  65. return gMaxLength;
  66. }
  67. //----------------------------------------------------------------------------
  68. // nsIURLParser getters
  69. //----------------------------------------------------------------------------
  70. nsIURLParser *
  71. net_GetAuthURLParser()
  72. {
  73. if (!gInitialized)
  74. InitGlobals();
  75. return gAuthURLParser;
  76. }
  77. nsIURLParser *
  78. net_GetNoAuthURLParser()
  79. {
  80. if (!gInitialized)
  81. InitGlobals();
  82. return gNoAuthURLParser;
  83. }
  84. nsIURLParser *
  85. net_GetStdURLParser()
  86. {
  87. if (!gInitialized)
  88. InitGlobals();
  89. return gStdURLParser;
  90. }
  91. //---------------------------------------------------------------------------
  92. // GetFileFromURLSpec implementations
  93. //---------------------------------------------------------------------------
  94. nsresult
  95. net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
  96. {
  97. nsAutoCString escPath;
  98. nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
  99. if (NS_FAILED(rv))
  100. return rv;
  101. if (escPath.Last() != '/') {
  102. escPath += '/';
  103. }
  104. result = escPath;
  105. return NS_OK;
  106. }
  107. nsresult
  108. net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
  109. {
  110. nsAutoCString escPath;
  111. nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
  112. if (NS_FAILED(rv))
  113. return rv;
  114. // if this file references a directory, then we need to ensure that the
  115. // URL ends with a slash. this is important since it affects the rules
  116. // for relative URL resolution when this URL is used as a base URL.
  117. // if the file does not exist, then we make no assumption about its type,
  118. // and simply leave the URL unmodified.
  119. if (escPath.Last() != '/') {
  120. bool dir;
  121. rv = aFile->IsDirectory(&dir);
  122. if (NS_SUCCEEDED(rv) && dir)
  123. escPath += '/';
  124. }
  125. result = escPath;
  126. return NS_OK;
  127. }
  128. //----------------------------------------------------------------------------
  129. // file:// URL parsing
  130. //----------------------------------------------------------------------------
  131. nsresult
  132. net_ParseFileURL(const nsACString &inURL,
  133. nsACString &outDirectory,
  134. nsACString &outFileBaseName,
  135. nsACString &outFileExtension)
  136. {
  137. nsresult rv;
  138. if (inURL.Length() > (uint32_t) gMaxLength) {
  139. return NS_ERROR_MALFORMED_URI;
  140. }
  141. outDirectory.Truncate();
  142. outFileBaseName.Truncate();
  143. outFileExtension.Truncate();
  144. const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
  145. const char *url = flatURL.get();
  146. nsAutoCString scheme;
  147. rv = net_ExtractURLScheme(flatURL, scheme);
  148. if (NS_FAILED(rv)) return rv;
  149. if (!scheme.EqualsLiteral("file")) {
  150. NS_ERROR("must be a file:// url");
  151. return NS_ERROR_UNEXPECTED;
  152. }
  153. nsIURLParser *parser = net_GetNoAuthURLParser();
  154. NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
  155. uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
  156. int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
  157. // invoke the parser to extract the URL path
  158. rv = parser->ParseURL(url, flatURL.Length(),
  159. nullptr, nullptr, // don't care about scheme
  160. nullptr, nullptr, // don't care about authority
  161. &pathPos, &pathLen);
  162. if (NS_FAILED(rv)) return rv;
  163. // invoke the parser to extract filepath from the path
  164. rv = parser->ParsePath(url + pathPos, pathLen,
  165. &filepathPos, &filepathLen,
  166. nullptr, nullptr, // don't care about query
  167. nullptr, nullptr); // don't care about ref
  168. if (NS_FAILED(rv)) return rv;
  169. filepathPos += pathPos;
  170. // invoke the parser to extract the directory and filename from filepath
  171. rv = parser->ParseFilePath(url + filepathPos, filepathLen,
  172. &directoryPos, &directoryLen,
  173. &basenamePos, &basenameLen,
  174. &extensionPos, &extensionLen);
  175. if (NS_FAILED(rv)) return rv;
  176. if (directoryLen > 0)
  177. outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
  178. if (basenameLen > 0)
  179. outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
  180. if (extensionLen > 0)
  181. outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
  182. // since we are using a no-auth url parser, there will never be a host
  183. // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
  184. return NS_OK;
  185. }
  186. //----------------------------------------------------------------------------
  187. // path manipulation functions
  188. //----------------------------------------------------------------------------
  189. // Replace all /./ with a / while resolving URLs
  190. // But only till #?
  191. void
  192. net_CoalesceDirs(netCoalesceFlags flags, char* path)
  193. {
  194. /* Stolen from the old netlib's mkparse.c.
  195. *
  196. * modifies a url of the form /foo/../foo1 -> /foo1
  197. * and /foo/./foo1 -> /foo/foo1
  198. * and /foo/foo1/.. -> /foo/
  199. */
  200. char *fwdPtr = path;
  201. char *urlPtr = path;
  202. char *lastslash = path;
  203. uint32_t traversal = 0;
  204. uint32_t special_ftp_len = 0;
  205. /* Remember if this url is a special ftp one: */
  206. if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
  207. {
  208. /* some schemes (for example ftp) have the speciality that
  209. the path can begin // or /%2F to mark the root of the
  210. servers filesystem, a simple / only marks the root relative
  211. to the user loging in. We remember the length of the marker */
  212. if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
  213. special_ftp_len = 4;
  214. else if (nsCRT::strncmp(path,"//",2) == 0 )
  215. special_ftp_len = 2;
  216. }
  217. /* find the last slash before # or ? */
  218. for(; (*fwdPtr != '\0') &&
  219. (*fwdPtr != '?') &&
  220. (*fwdPtr != '#'); ++fwdPtr)
  221. {
  222. }
  223. /* found nothing, but go back one only */
  224. /* if there is something to go back to */
  225. if (fwdPtr != path && *fwdPtr == '\0')
  226. {
  227. --fwdPtr;
  228. }
  229. /* search the slash */
  230. for(; (fwdPtr != path) &&
  231. (*fwdPtr != '/'); --fwdPtr)
  232. {
  233. }
  234. lastslash = fwdPtr;
  235. fwdPtr = path;
  236. /* replace all %2E or %2e with . in the path */
  237. /* but stop at lastchar if non null */
  238. for(; (*fwdPtr != '\0') &&
  239. (*fwdPtr != '?') &&
  240. (*fwdPtr != '#') &&
  241. (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
  242. {
  243. if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
  244. (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
  245. {
  246. *urlPtr++ = '.';
  247. ++fwdPtr;
  248. ++fwdPtr;
  249. }
  250. else
  251. {
  252. *urlPtr++ = *fwdPtr;
  253. }
  254. }
  255. // Copy remaining stuff past the #?;
  256. for (; *fwdPtr != '\0'; ++fwdPtr)
  257. {
  258. *urlPtr++ = *fwdPtr;
  259. }
  260. *urlPtr = '\0'; // terminate the url
  261. // start again, this time for real
  262. fwdPtr = path;
  263. urlPtr = path;
  264. for(; (*fwdPtr != '\0') &&
  265. (*fwdPtr != '?') &&
  266. (*fwdPtr != '#'); ++fwdPtr)
  267. {
  268. if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
  269. {
  270. // remove . followed by slash
  271. ++fwdPtr;
  272. }
  273. else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
  274. (*(fwdPtr+3) == '/' ||
  275. *(fwdPtr+3) == '\0' || // This will take care of
  276. *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
  277. *(fwdPtr+3) == '#'))
  278. {
  279. // remove foo/..
  280. // reverse the urlPtr to the previous slash if possible
  281. // if url does not allow relative root then drop .. above root
  282. // otherwise retain them in the path
  283. if(traversal > 0 || !(flags &
  284. NET_COALESCE_ALLOW_RELATIVE_ROOT))
  285. {
  286. if (urlPtr != path)
  287. urlPtr--; // we must be going back at least by one
  288. for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
  289. ; // null body
  290. --traversal; // count back
  291. // forward the fwdPtr past the ../
  292. fwdPtr += 2;
  293. // if we have reached the beginning of the path
  294. // while searching for the previous / and we remember
  295. // that it is an url that begins with /%2F then
  296. // advance urlPtr again by 3 chars because /%2F already
  297. // marks the root of the path
  298. if (urlPtr == path && special_ftp_len > 3)
  299. {
  300. ++urlPtr;
  301. ++urlPtr;
  302. ++urlPtr;
  303. }
  304. // special case if we have reached the end
  305. // to preserve the last /
  306. if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
  307. ++urlPtr;
  308. }
  309. else
  310. {
  311. // there are to much /.. in this path, just copy them instead.
  312. // forward the urlPtr past the /.. and copying it
  313. // However if we remember it is an url that starts with
  314. // /%2F and urlPtr just points at the "F" of "/%2F" then do
  315. // not overwrite it with the /, just copy .. and move forward
  316. // urlPtr.
  317. if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
  318. ++urlPtr;
  319. else
  320. *urlPtr++ = *fwdPtr;
  321. ++fwdPtr;
  322. *urlPtr++ = *fwdPtr;
  323. ++fwdPtr;
  324. *urlPtr++ = *fwdPtr;
  325. }
  326. }
  327. else
  328. {
  329. // count the hierachie, but only if we do not have reached
  330. // the root of some special urls with a special root marker
  331. if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
  332. (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
  333. traversal++;
  334. // copy the url incrementaly
  335. *urlPtr++ = *fwdPtr;
  336. }
  337. }
  338. /*
  339. * Now lets remove trailing . case
  340. * /foo/foo1/. -> /foo/foo1/
  341. */
  342. if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
  343. urlPtr--;
  344. // Copy remaining stuff past the #?;
  345. for (; *fwdPtr != '\0'; ++fwdPtr)
  346. {
  347. *urlPtr++ = *fwdPtr;
  348. }
  349. *urlPtr = '\0'; // terminate the url
  350. }
  351. nsresult
  352. net_ResolveRelativePath(const nsACString &relativePath,
  353. const nsACString &basePath,
  354. nsACString &result)
  355. {
  356. nsAutoCString name;
  357. nsAutoCString path(basePath);
  358. bool needsDelim = false;
  359. if ( !path.IsEmpty() ) {
  360. char16_t last = path.Last();
  361. needsDelim = !(last == '/');
  362. }
  363. nsACString::const_iterator beg, end;
  364. relativePath.BeginReading(beg);
  365. relativePath.EndReading(end);
  366. bool stop = false;
  367. char c;
  368. for (; !stop; ++beg) {
  369. c = (beg == end) ? '\0' : *beg;
  370. //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
  371. switch (c) {
  372. case '\0':
  373. case '#':
  374. case '?':
  375. stop = true;
  376. MOZ_FALLTHROUGH;
  377. case '/':
  378. // delimiter found
  379. if (name.EqualsLiteral("..")) {
  380. // pop path
  381. // If we already have the delim at end, then
  382. // skip over that when searching for next one to the left
  383. int32_t offset = path.Length() - (needsDelim ? 1 : 2);
  384. // First check for errors
  385. if (offset < 0 )
  386. return NS_ERROR_MALFORMED_URI;
  387. int32_t pos = path.RFind("/", false, offset);
  388. if (pos >= 0)
  389. path.Truncate(pos + 1);
  390. else
  391. path.Truncate();
  392. }
  393. else if (name.IsEmpty() || name.EqualsLiteral(".")) {
  394. // do nothing
  395. }
  396. else {
  397. // append name to path
  398. if (needsDelim)
  399. path += '/';
  400. path += name;
  401. needsDelim = true;
  402. }
  403. name.Truncate();
  404. break;
  405. default:
  406. // append char to name
  407. name += c;
  408. }
  409. }
  410. // append anything left on relativePath (e.g. #..., ;..., ?...)
  411. if (c != '\0')
  412. path += Substring(--beg, end);
  413. result = path;
  414. return NS_OK;
  415. }
  416. //----------------------------------------------------------------------------
  417. // scheme fu
  418. //----------------------------------------------------------------------------
  419. static bool isAsciiAlpha(char c) {
  420. return nsCRT::IsAsciiAlpha(c);
  421. }
  422. static bool
  423. net_IsValidSchemeChar(const char aChar)
  424. {
  425. if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
  426. aChar == '+' || aChar == '.' || aChar == '-') {
  427. return true;
  428. }
  429. return false;
  430. }
  431. /* Extract URI-Scheme if possible */
  432. nsresult
  433. net_ExtractURLScheme(const nsACString &inURI,
  434. nsACString& scheme)
  435. {
  436. nsACString::const_iterator start, end;
  437. inURI.BeginReading(start);
  438. inURI.EndReading(end);
  439. // Strip C0 and space from begining
  440. while (start != end) {
  441. if ((uint8_t) *start > 0x20) {
  442. break;
  443. }
  444. start++;
  445. }
  446. Tokenizer p(Substring(start, end), "\r\n\t");
  447. p.Record();
  448. if (!p.CheckChar(isAsciiAlpha)) {
  449. // First char must be alpha
  450. return NS_ERROR_MALFORMED_URI;
  451. }
  452. while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
  453. // Skip valid scheme characters or \r\n\t
  454. }
  455. if (!p.CheckChar(':')) {
  456. return NS_ERROR_MALFORMED_URI;
  457. }
  458. p.Claim(scheme);
  459. scheme.StripChars("\r\n\t");
  460. return NS_OK;
  461. }
  462. bool
  463. net_IsValidScheme(const char *scheme, uint32_t schemeLen)
  464. {
  465. // first char must be alpha
  466. if (!nsCRT::IsAsciiAlpha(*scheme))
  467. return false;
  468. // nsCStrings may have embedded nulls -- reject those too
  469. for (; schemeLen; ++scheme, --schemeLen) {
  470. if (!(nsCRT::IsAsciiAlpha(*scheme) ||
  471. nsCRT::IsAsciiDigit(*scheme) ||
  472. *scheme == '+' ||
  473. *scheme == '.' ||
  474. *scheme == '-'))
  475. return false;
  476. }
  477. return true;
  478. }
  479. bool
  480. net_IsAbsoluteURL(const nsACString& uri)
  481. {
  482. nsACString::const_iterator start, end;
  483. uri.BeginReading(start);
  484. uri.EndReading(end);
  485. // Strip C0 and space from begining
  486. while (start != end) {
  487. if ((uint8_t) *start > 0x20) {
  488. break;
  489. }
  490. start++;
  491. }
  492. Tokenizer p(Substring(start, end), "\r\n\t");
  493. // First char must be alpha
  494. if (!p.CheckChar(isAsciiAlpha)) {
  495. return false;
  496. }
  497. while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
  498. // Skip valid scheme characters or \r\n\t
  499. }
  500. if (!p.CheckChar(':')) {
  501. return false;
  502. }
  503. p.SkipWhites();
  504. if (!p.CheckChar('/')) {
  505. return false;
  506. }
  507. p.SkipWhites();
  508. if (p.CheckChar('/')) {
  509. // aSpec is really absolute. Ignore aBaseURI in this case
  510. return true;
  511. }
  512. return false;
  513. }
  514. void
  515. net_FilterURIString(const nsACString& input, nsACString& result)
  516. {
  517. const char kCharsToStrip[] = "\r\n\t";
  518. result.Truncate();
  519. auto start = input.BeginReading();
  520. auto end = input.EndReading();
  521. // Trim off leading and trailing invalid chars.
  522. auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
  523. auto newStart = std::find_if(start, end, charFilter);
  524. auto newEnd = std::find_if(
  525. std::reverse_iterator<decltype(end)>(end),
  526. std::reverse_iterator<decltype(newStart)>(newStart),
  527. charFilter).base();
  528. // Check if chars need to be stripped.
  529. auto itr = std::find_first_of(
  530. newStart, newEnd, std::begin(kCharsToStrip), std::end(kCharsToStrip));
  531. const bool needsStrip = itr != newEnd;
  532. // Just use the passed in string rather than creating new copies if no
  533. // changes are necessary.
  534. if (newStart == start && newEnd == end && !needsStrip) {
  535. result = input;
  536. return;
  537. }
  538. result.Assign(Substring(newStart, newEnd));
  539. if (needsStrip) {
  540. result.StripChars(kCharsToStrip);
  541. }
  542. }
  543. #if defined(XP_WIN)
  544. bool
  545. net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
  546. {
  547. bool writing = false;
  548. nsACString::const_iterator beginIter, endIter;
  549. aURL.BeginReading(beginIter);
  550. aURL.EndReading(endIter);
  551. const char *s, *begin = beginIter.get();
  552. for (s = begin; s != endIter.get(); ++s)
  553. {
  554. if (*s == '\\')
  555. {
  556. writing = true;
  557. if (s > begin)
  558. aResultBuf.Append(begin, s - begin);
  559. aResultBuf += '/';
  560. begin = s + 1;
  561. }
  562. }
  563. if (writing && s > begin)
  564. aResultBuf.Append(begin, s - begin);
  565. return writing;
  566. }
  567. #endif
  568. //----------------------------------------------------------------------------
  569. // miscellaneous (i.e., stuff that should really be elsewhere)
  570. //----------------------------------------------------------------------------
  571. static inline
  572. void ToLower(char &c)
  573. {
  574. if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
  575. c += 'a' - 'A';
  576. }
  577. void
  578. net_ToLowerCase(char *str, uint32_t length)
  579. {
  580. for (char *end = str + length; str < end; ++str)
  581. ToLower(*str);
  582. }
  583. void
  584. net_ToLowerCase(char *str)
  585. {
  586. for (; *str; ++str)
  587. ToLower(*str);
  588. }
  589. char *
  590. net_FindCharInSet(const char *iter, const char *stop, const char *set)
  591. {
  592. for (; iter != stop && *iter; ++iter) {
  593. for (const char *s = set; *s; ++s) {
  594. if (*iter == *s)
  595. return (char *) iter;
  596. }
  597. }
  598. return (char *) iter;
  599. }
  600. char *
  601. net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
  602. {
  603. repeat:
  604. for (const char *s = set; *s; ++s) {
  605. if (*iter == *s) {
  606. if (++iter == stop)
  607. break;
  608. goto repeat;
  609. }
  610. }
  611. return (char *) iter;
  612. }
  613. char *
  614. net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
  615. {
  616. --iter;
  617. --stop;
  618. if (iter == stop)
  619. return (char *) iter;
  620. repeat:
  621. for (const char *s = set; *s; ++s) {
  622. if (*iter == *s) {
  623. if (--iter == stop)
  624. break;
  625. goto repeat;
  626. }
  627. }
  628. return (char *) iter;
  629. }
  630. #define HTTP_LWS " \t"
  631. // Return the index of the closing quote of the string, if any
  632. static uint32_t
  633. net_FindStringEnd(const nsCString& flatStr,
  634. uint32_t stringStart,
  635. char stringDelim)
  636. {
  637. NS_ASSERTION(stringStart < flatStr.Length() &&
  638. flatStr.CharAt(stringStart) == stringDelim &&
  639. (stringDelim == '"' || stringDelim == '\''),
  640. "Invalid stringStart");
  641. const char set[] = { stringDelim, '\\', '\0' };
  642. do {
  643. // stringStart points to either the start quote or the last
  644. // escaped char (the char following a '\\')
  645. // Write to searchStart here, so that when we get back to the
  646. // top of the loop right outside this one we search from the
  647. // right place.
  648. uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
  649. if (stringEnd == uint32_t(kNotFound))
  650. return flatStr.Length();
  651. if (flatStr.CharAt(stringEnd) == '\\') {
  652. // Hit a backslash-escaped char. Need to skip over it.
  653. stringStart = stringEnd + 1;
  654. if (stringStart == flatStr.Length())
  655. return stringStart;
  656. // Go back to looking for the next escape or the string end
  657. continue;
  658. }
  659. return stringEnd;
  660. } while (true);
  661. NS_NOTREACHED("How did we get here?");
  662. return flatStr.Length();
  663. }
  664. static uint32_t
  665. net_FindMediaDelimiter(const nsCString& flatStr,
  666. uint32_t searchStart,
  667. char delimiter)
  668. {
  669. do {
  670. // searchStart points to the spot from which we should start looking
  671. // for the delimiter.
  672. const char delimStr[] = { delimiter, '"', '\0' };
  673. uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
  674. if (curDelimPos == uint32_t(kNotFound))
  675. return flatStr.Length();
  676. char ch = flatStr.CharAt(curDelimPos);
  677. if (ch == delimiter) {
  678. // Found delimiter
  679. return curDelimPos;
  680. }
  681. // We hit the start of a quoted string. Look for its end.
  682. searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
  683. if (searchStart == flatStr.Length())
  684. return searchStart;
  685. ++searchStart;
  686. // searchStart now points to the first char after the end of the
  687. // string, so just go back to the top of the loop and look for
  688. // |delimiter| again.
  689. } while (true);
  690. NS_NOTREACHED("How did we get here?");
  691. return flatStr.Length();
  692. }
  693. // aOffset should be added to aCharsetStart and aCharsetEnd if this
  694. // function sets them.
  695. static void
  696. net_ParseMediaType(const nsACString &aMediaTypeStr,
  697. nsACString &aContentType,
  698. nsACString &aContentCharset,
  699. int32_t aOffset,
  700. bool *aHadCharset,
  701. int32_t *aCharsetStart,
  702. int32_t *aCharsetEnd,
  703. bool aStrict)
  704. {
  705. const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
  706. const char* start = flatStr.get();
  707. const char* end = start + flatStr.Length();
  708. // Trim LWS leading and trailing whitespace from type. We include '(' in
  709. // the trailing trim set to catch media-type comments, which are not at all
  710. // standard, but may occur in rare cases.
  711. const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
  712. const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
  713. const char* charset = "";
  714. const char* charsetEnd = charset;
  715. int32_t charsetParamStart = 0;
  716. int32_t charsetParamEnd = 0;
  717. uint32_t consumed = typeEnd - type;
  718. // Iterate over parameters
  719. bool typeHasCharset = false;
  720. uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
  721. if (paramStart != uint32_t(kNotFound)) {
  722. // We have parameters. Iterate over them.
  723. uint32_t curParamStart = paramStart + 1;
  724. do {
  725. uint32_t curParamEnd =
  726. net_FindMediaDelimiter(flatStr, curParamStart, ';');
  727. const char* paramName = net_FindCharNotInSet(start + curParamStart,
  728. start + curParamEnd,
  729. HTTP_LWS);
  730. static const char charsetStr[] = "charset=";
  731. if (PL_strncasecmp(paramName, charsetStr,
  732. sizeof(charsetStr) - 1) == 0) {
  733. charset = paramName + sizeof(charsetStr) - 1;
  734. charsetEnd = start + curParamEnd;
  735. typeHasCharset = true;
  736. charsetParamStart = curParamStart - 1;
  737. charsetParamEnd = curParamEnd;
  738. }
  739. consumed = curParamEnd;
  740. curParamStart = curParamEnd + 1;
  741. } while (curParamStart < flatStr.Length());
  742. }
  743. bool charsetNeedsQuotedStringUnescaping = false;
  744. if (typeHasCharset) {
  745. // Trim LWS leading and trailing whitespace from charset. We include
  746. // '(' in the trailing trim set to catch media-type comments, which are
  747. // not at all standard, but may occur in rare cases.
  748. charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
  749. if (*charset == '"') {
  750. charsetNeedsQuotedStringUnescaping = true;
  751. charsetEnd =
  752. start + net_FindStringEnd(flatStr, charset - start, *charset);
  753. charset++;
  754. NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
  755. } else {
  756. charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
  757. }
  758. }
  759. // if the server sent "*/*", it is meaningless, so do not store it.
  760. // also, if type is the same as aContentType, then just update the
  761. // charset. however, if charset is empty and aContentType hasn't
  762. // changed, then don't wipe-out an existing aContentCharset. We
  763. // also want to reject a mime-type if it does not include a slash.
  764. // some servers give junk after the charset parameter, which may
  765. // include a comma, so this check makes us a bit more tolerant.
  766. if (type != typeEnd &&
  767. memchr(type, '/', typeEnd - type) != nullptr &&
  768. (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end) :
  769. (strncmp(type, "*/*", typeEnd - type) != 0))) {
  770. // Common case here is that aContentType is empty
  771. bool eq = !aContentType.IsEmpty() &&
  772. aContentType.Equals(Substring(type, typeEnd),
  773. nsCaseInsensitiveCStringComparator());
  774. if (!eq) {
  775. aContentType.Assign(type, typeEnd - type);
  776. ToLowerCase(aContentType);
  777. }
  778. if ((!eq && *aHadCharset) || typeHasCharset) {
  779. *aHadCharset = true;
  780. if (charsetNeedsQuotedStringUnescaping) {
  781. // parameters using the "quoted-string" syntax need
  782. // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
  783. aContentCharset.Truncate();
  784. for (const char *c = charset; c != charsetEnd; c++) {
  785. if (*c == '\\' && c + 1 != charsetEnd) {
  786. // eat escape
  787. c++;
  788. }
  789. aContentCharset.Append(*c);
  790. }
  791. }
  792. else {
  793. aContentCharset.Assign(charset, charsetEnd - charset);
  794. }
  795. if (typeHasCharset) {
  796. *aCharsetStart = charsetParamStart + aOffset;
  797. *aCharsetEnd = charsetParamEnd + aOffset;
  798. }
  799. }
  800. // Only set a new charset position if this is a different type
  801. // from the last one we had and it doesn't already have a
  802. // charset param. If this is the same type, we probably want
  803. // to leave the charset position on its first occurrence.
  804. if (!eq && !typeHasCharset) {
  805. int32_t charsetStart = int32_t(paramStart);
  806. if (charsetStart == kNotFound)
  807. charsetStart = flatStr.Length();
  808. *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
  809. }
  810. }
  811. }
  812. #undef HTTP_LWS
  813. void
  814. net_ParseContentType(const nsACString &aHeaderStr,
  815. nsACString &aContentType,
  816. nsACString &aContentCharset,
  817. bool *aHadCharset)
  818. {
  819. int32_t dummy1, dummy2;
  820. net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
  821. aHadCharset, &dummy1, &dummy2);
  822. }
  823. void
  824. net_ParseContentType(const nsACString &aHeaderStr,
  825. nsACString &aContentType,
  826. nsACString &aContentCharset,
  827. bool *aHadCharset,
  828. int32_t *aCharsetStart,
  829. int32_t *aCharsetEnd)
  830. {
  831. //
  832. // Augmented BNF (from RFC 2616 section 3.7):
  833. //
  834. // header-value = media-type *( LWS "," LWS media-type )
  835. // media-type = type "/" subtype *( LWS ";" LWS parameter )
  836. // type = token
  837. // subtype = token
  838. // parameter = attribute "=" value
  839. // attribute = token
  840. // value = token | quoted-string
  841. //
  842. //
  843. // Examples:
  844. //
  845. // text/html
  846. // text/html, text/html
  847. // text/html,text/html; charset=ISO-8859-1
  848. // text/html,text/html; charset="ISO-8859-1"
  849. // text/html;charset=ISO-8859-1, text/html
  850. // text/html;charset='ISO-8859-1', text/html
  851. // application/octet-stream
  852. //
  853. *aHadCharset = false;
  854. const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
  855. // iterate over media-types. Note that ',' characters can happen
  856. // inside quoted strings, so we need to watch out for that.
  857. uint32_t curTypeStart = 0;
  858. do {
  859. // curTypeStart points to the start of the current media-type. We want
  860. // to look for its end.
  861. uint32_t curTypeEnd =
  862. net_FindMediaDelimiter(flatStr, curTypeStart, ',');
  863. // At this point curTypeEnd points to the spot where the media-type
  864. // starting at curTypeEnd ends. Time to parse that!
  865. net_ParseMediaType(Substring(flatStr, curTypeStart,
  866. curTypeEnd - curTypeStart),
  867. aContentType, aContentCharset, curTypeStart,
  868. aHadCharset, aCharsetStart, aCharsetEnd, false);
  869. // And let's move on to the next media-type
  870. curTypeStart = curTypeEnd + 1;
  871. } while (curTypeStart < flatStr.Length());
  872. }
  873. void
  874. net_ParseRequestContentType(const nsACString &aHeaderStr,
  875. nsACString &aContentType,
  876. nsACString &aContentCharset,
  877. bool *aHadCharset)
  878. {
  879. //
  880. // Augmented BNF (from RFC 7231 section 3.1.1.1):
  881. //
  882. // media-type = type "/" subtype *( OWS ";" OWS parameter )
  883. // type = token
  884. // subtype = token
  885. // parameter = token "=" ( token / quoted-string )
  886. //
  887. // Examples:
  888. //
  889. // text/html
  890. // text/html; charset=ISO-8859-1
  891. // text/html; charset="ISO-8859-1"
  892. // application/octet-stream
  893. //
  894. aContentType.Truncate();
  895. aContentCharset.Truncate();
  896. *aHadCharset = false;
  897. const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
  898. // At this point curTypeEnd points to the spot where the media-type
  899. // starting at curTypeEnd ends. Time to parse that!
  900. nsAutoCString contentType, contentCharset;
  901. bool hadCharset = false;
  902. int32_t dummy1, dummy2;
  903. uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
  904. if (typeEnd != flatStr.Length()) {
  905. // We have some stuff left at the end, so this is not a valid
  906. // request Content-Type header.
  907. return;
  908. }
  909. net_ParseMediaType(flatStr, contentType, contentCharset, 0,
  910. &hadCharset, &dummy1, &dummy2, true);
  911. aContentType = contentType;
  912. aContentCharset = contentCharset;
  913. *aHadCharset = hadCharset;
  914. }
  915. bool
  916. net_IsValidHostName(const nsCSubstring &host)
  917. {
  918. const char *end = host.EndReading();
  919. // Use explicit whitelists to select which characters we are
  920. // willing to send to lower-level DNS logic. This is more
  921. // self-documenting, and can also be slightly faster than the
  922. // blacklist approach, since DNS names are the common case, and
  923. // the commonest characters will tend to be near the start of
  924. // the list.
  925. // Whitelist for DNS names (RFC 1035) with extra characters added
  926. // for pragmatic reasons "$+_"
  927. // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
  928. if (net_FindCharNotInSet(host.BeginReading(), end,
  929. "abcdefghijklmnopqrstuvwxyz"
  930. ".-0123456789"
  931. "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
  932. return true;
  933. // Might be a valid IPv6 link-local address containing a percent sign
  934. nsAutoCString strhost(host);
  935. PRNetAddr addr;
  936. return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
  937. }
  938. bool
  939. net_IsValidIPv4Addr(const char *addr, int32_t addrLen)
  940. {
  941. RangedPtr<const char> p(addr, addrLen);
  942. int32_t octet = -1; // means no digit yet
  943. int32_t dotCount = 0; // number of dots in the address
  944. for (; addrLen; ++p, --addrLen) {
  945. if (*p == '.') {
  946. dotCount++;
  947. if (octet == -1) {
  948. // invalid octet
  949. return false;
  950. }
  951. octet = -1;
  952. } else if (*p >= '0' && *p <='9') {
  953. if (octet == 0) {
  954. // leading 0 is not allowed
  955. return false;
  956. } else if (octet == -1) {
  957. octet = *p - '0';
  958. } else {
  959. octet *= 10;
  960. octet += *p - '0';
  961. if (octet > 255)
  962. return false;
  963. }
  964. } else {
  965. // invalid character
  966. return false;
  967. }
  968. }
  969. return (dotCount == 3 && octet != -1);
  970. }
  971. bool
  972. net_IsValidIPv6Addr(const char *addr, int32_t addrLen)
  973. {
  974. RangedPtr<const char> p(addr, addrLen);
  975. int32_t digits = 0; // number of digits in current block
  976. int32_t colons = 0; // number of colons in a row during parsing
  977. int32_t blocks = 0; // number of hexadecimal blocks
  978. bool haveZeros = false; // true if double colon is present in the address
  979. for (; addrLen; ++p, --addrLen) {
  980. if (*p == ':') {
  981. if (colons == 0) {
  982. if (digits != 0) {
  983. digits = 0;
  984. blocks++;
  985. }
  986. } else if (colons == 1) {
  987. if (haveZeros)
  988. return false; // only one occurrence is allowed
  989. haveZeros = true;
  990. } else {
  991. // too many colons in a row
  992. return false;
  993. }
  994. colons++;
  995. } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
  996. (*p >= 'A' && *p <= 'F')) {
  997. if (colons == 1 && blocks == 0) // starts with a single colon
  998. return false;
  999. if (digits == 4) // too many digits
  1000. return false;
  1001. colons = 0;
  1002. digits++;
  1003. } else if (*p == '.') {
  1004. // check valid IPv4 from the beginning of the last block
  1005. if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
  1006. return false;
  1007. return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
  1008. } else {
  1009. // invalid character
  1010. return false;
  1011. }
  1012. }
  1013. if (colons == 1) // ends with a single colon
  1014. return false;
  1015. if (digits) // there is a block at the end
  1016. blocks++;
  1017. return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);
  1018. }