nsDirIndexParser.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */
  6. #include "mozilla/ArrayUtils.h"
  7. #include "prprf.h"
  8. #include "nsDirIndexParser.h"
  9. #include "nsEscape.h"
  10. #include "nsIInputStream.h"
  11. #include "nsCRT.h"
  12. #include "mozilla/dom/FallbackEncoding.h"
  13. #include "nsITextToSubURI.h"
  14. #include "nsIDirIndex.h"
  15. #include "nsServiceManagerUtils.h"
  16. using namespace mozilla;
  17. NS_IMPL_ISUPPORTS(nsDirIndexParser,
  18. nsIRequestObserver,
  19. nsIStreamListener,
  20. nsIDirIndexParser)
  21. nsDirIndexParser::nsDirIndexParser() {
  22. }
  23. nsresult
  24. nsDirIndexParser::Init() {
  25. mLineStart = 0;
  26. mHasDescription = false;
  27. mFormat[0] = -1;
  28. mozilla::dom::FallbackEncoding::FromLocale(mEncoding);
  29. nsresult rv;
  30. // XXX not threadsafe
  31. if (gRefCntParser++ == 0)
  32. rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI);
  33. else
  34. rv = NS_OK;
  35. return rv;
  36. }
  37. nsDirIndexParser::~nsDirIndexParser() {
  38. // XXX not threadsafe
  39. if (--gRefCntParser == 0) {
  40. NS_IF_RELEASE(gTextToSubURI);
  41. }
  42. }
  43. NS_IMETHODIMP
  44. nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) {
  45. mListener = aListener;
  46. return NS_OK;
  47. }
  48. NS_IMETHODIMP
  49. nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) {
  50. NS_IF_ADDREF(*aListener = mListener.get());
  51. return NS_OK;
  52. }
  53. NS_IMETHODIMP
  54. nsDirIndexParser::GetComment(char** aComment) {
  55. *aComment = ToNewCString(mComment);
  56. if (!*aComment)
  57. return NS_ERROR_OUT_OF_MEMORY;
  58. return NS_OK;
  59. }
  60. NS_IMETHODIMP
  61. nsDirIndexParser::SetEncoding(const char* aEncoding) {
  62. mEncoding.Assign(aEncoding);
  63. return NS_OK;
  64. }
  65. NS_IMETHODIMP
  66. nsDirIndexParser::GetEncoding(char** aEncoding) {
  67. *aEncoding = ToNewCString(mEncoding);
  68. if (!*aEncoding)
  69. return NS_ERROR_OUT_OF_MEMORY;
  70. return NS_OK;
  71. }
  72. NS_IMETHODIMP
  73. nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) {
  74. return NS_OK;
  75. }
  76. NS_IMETHODIMP
  77. nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt,
  78. nsresult aStatusCode) {
  79. // Finish up
  80. if (mBuf.Length() > (uint32_t) mLineStart) {
  81. ProcessData(aRequest, aCtxt);
  82. }
  83. return NS_OK;
  84. }
  85. nsDirIndexParser::Field
  86. nsDirIndexParser::gFieldTable[] = {
  87. { "Filename", FIELD_FILENAME },
  88. { "Description", FIELD_DESCRIPTION },
  89. { "Content-Length", FIELD_CONTENTLENGTH },
  90. { "Last-Modified", FIELD_LASTMODIFIED },
  91. { "Content-Type", FIELD_CONTENTTYPE },
  92. { "File-Type", FIELD_FILETYPE },
  93. { nullptr, FIELD_UNKNOWN }
  94. };
  95. nsrefcnt nsDirIndexParser::gRefCntParser = 0;
  96. nsITextToSubURI *nsDirIndexParser::gTextToSubURI;
  97. nsresult
  98. nsDirIndexParser::ParseFormat(const char* aFormatStr)
  99. {
  100. // Parse a "200" format line, and remember the fields and their
  101. // ordering in mFormat. Multiple 200 lines stomp on each other.
  102. unsigned int formatNum = 0;
  103. mFormat[0] = -1;
  104. do {
  105. while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr)))
  106. ++aFormatStr;
  107. if (! *aFormatStr)
  108. break;
  109. nsAutoCString name;
  110. int32_t len = 0;
  111. while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len])))
  112. ++len;
  113. name.SetCapacity(len + 1);
  114. name.Append(aFormatStr, len);
  115. aFormatStr += len;
  116. // Okay, we're gonna monkey with the nsStr. Bold!
  117. name.SetLength(nsUnescapeCount(name.BeginWriting()));
  118. // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html
  119. if (name.LowerCaseEqualsLiteral("description"))
  120. mHasDescription = true;
  121. for (Field* i = gFieldTable; i->mName; ++i) {
  122. if (name.EqualsIgnoreCase(i->mName)) {
  123. mFormat[formatNum] = i->mType;
  124. mFormat[++formatNum] = -1;
  125. break;
  126. }
  127. }
  128. } while (*aFormatStr && (formatNum < (ArrayLength(mFormat)-1)));
  129. return NS_OK;
  130. }
  131. nsresult
  132. nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr, int32_t aLineLen)
  133. {
  134. // Parse a "201" data line, using the field ordering specified in
  135. // mFormat.
  136. if(mFormat[0] == -1) {
  137. // Ignore if we haven't seen a format yet.
  138. return NS_OK;
  139. }
  140. nsresult rv = NS_OK;
  141. nsAutoCString filename;
  142. int32_t lineLen = aLineLen;
  143. for (int32_t i = 0; mFormat[i] != -1; ++i) {
  144. // If we've exhausted the data before we run out of fields, just bail.
  145. if (!*aDataStr || (lineLen < 1)) {
  146. return NS_OK;
  147. }
  148. while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) {
  149. ++aDataStr;
  150. --lineLen;
  151. }
  152. if (lineLen < 1) {
  153. // invalid format, bail
  154. return NS_OK;
  155. }
  156. char *value = aDataStr;
  157. if (*aDataStr == '"' || *aDataStr == '\'') {
  158. // it's a quoted string. snarf everything up to the next quote character
  159. const char quotechar = *(aDataStr++);
  160. lineLen--;
  161. ++value;
  162. while ((lineLen > 0) && *aDataStr != quotechar) {
  163. ++aDataStr;
  164. --lineLen;
  165. }
  166. if (lineLen > 0) {
  167. *aDataStr++ = '\0';
  168. --lineLen;
  169. }
  170. if (!lineLen) {
  171. // invalid format, bail
  172. return NS_OK;
  173. }
  174. } else {
  175. // it's unquoted. snarf until we see whitespace.
  176. value = aDataStr;
  177. while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) {
  178. ++aDataStr;
  179. --lineLen;
  180. }
  181. if (lineLen > 0) {
  182. *aDataStr++ = '\0';
  183. --lineLen;
  184. }
  185. // even if we ran out of line length here, there's still a trailing zero
  186. // byte afterwards
  187. }
  188. fieldType t = fieldType(mFormat[i]);
  189. switch (t) {
  190. case FIELD_FILENAME: {
  191. // don't unescape at this point, so that UnEscapeAndConvert() can
  192. filename = value;
  193. bool success = false;
  194. nsAutoString entryuri;
  195. if (gTextToSubURI) {
  196. char16_t *result = nullptr;
  197. if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert(mEncoding.get(), filename.get(),
  198. &result)) && (result)) {
  199. if (*result) {
  200. aIdx->SetLocation(filename.get());
  201. if (!mHasDescription)
  202. aIdx->SetDescription(result);
  203. success = true;
  204. }
  205. free(result);
  206. } else {
  207. NS_WARNING("UnEscapeAndConvert error");
  208. }
  209. }
  210. if (!success) {
  211. // if unsuccessfully at charset conversion, then
  212. // just fallback to unescape'ing in-place
  213. // XXX - this shouldn't be using UTF8, should it?
  214. // when can we fail to get the service, anyway? - bbaetz
  215. aIdx->SetLocation(filename.get());
  216. if (!mHasDescription) {
  217. aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
  218. }
  219. }
  220. }
  221. break;
  222. case FIELD_DESCRIPTION:
  223. nsUnescape(value);
  224. aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
  225. break;
  226. case FIELD_CONTENTLENGTH:
  227. {
  228. int64_t len;
  229. int32_t status = PR_sscanf(value, "%lld", &len);
  230. if (status == 1)
  231. aIdx->SetSize(len);
  232. else
  233. aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown
  234. }
  235. break;
  236. case FIELD_LASTMODIFIED:
  237. {
  238. PRTime tm;
  239. nsUnescape(value);
  240. if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) {
  241. aIdx->SetLastModified(tm);
  242. }
  243. }
  244. break;
  245. case FIELD_CONTENTTYPE:
  246. aIdx->SetContentType(value);
  247. break;
  248. case FIELD_FILETYPE:
  249. // unescape in-place
  250. nsUnescape(value);
  251. if (!nsCRT::strcasecmp(value, "directory")) {
  252. aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY);
  253. } else if (!nsCRT::strcasecmp(value, "file")) {
  254. aIdx->SetType(nsIDirIndex::TYPE_FILE);
  255. } else if (!nsCRT::strcasecmp(value, "symbolic-link")) {
  256. aIdx->SetType(nsIDirIndex::TYPE_SYMLINK);
  257. } else {
  258. aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN);
  259. }
  260. break;
  261. case FIELD_UNKNOWN:
  262. // ignore
  263. break;
  264. }
  265. }
  266. return NS_OK;
  267. }
  268. NS_IMETHODIMP
  269. nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt,
  270. nsIInputStream *aStream,
  271. uint64_t aSourceOffset,
  272. uint32_t aCount) {
  273. if (aCount < 1)
  274. return NS_OK;
  275. int32_t len = mBuf.Length();
  276. // Ensure that our mBuf has capacity to hold the data we're about to
  277. // read.
  278. if (!mBuf.SetLength(len + aCount, fallible))
  279. return NS_ERROR_OUT_OF_MEMORY;
  280. // Now read the data into our buffer.
  281. nsresult rv;
  282. uint32_t count;
  283. rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count);
  284. if (NS_FAILED(rv)) return rv;
  285. // Set the string's length according to the amount of data we've read.
  286. // Note: we know this to work on nsCString. This isn't guaranteed to
  287. // work on other strings.
  288. mBuf.SetLength(len + count);
  289. return ProcessData(aRequest, aCtxt);
  290. }
  291. nsresult
  292. nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) {
  293. if (!mListener)
  294. return NS_ERROR_FAILURE;
  295. int32_t numItems = 0;
  296. while(true) {
  297. ++numItems;
  298. int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart);
  299. if (eol < 0) break;
  300. mBuf.SetCharAt(char16_t('\0'), eol);
  301. const char *line = mBuf.get() + mLineStart;
  302. int32_t lineLen = eol - mLineStart;
  303. mLineStart = eol + 1;
  304. if (lineLen >= 4) {
  305. nsresult rv;
  306. const char *buf = line;
  307. if (buf[0] == '1') {
  308. if (buf[1] == '0') {
  309. if (buf[2] == '0' && buf[3] == ':') {
  310. // 100. Human-readable comment line. Ignore
  311. } else if (buf[2] == '1' && buf[3] == ':') {
  312. // 101. Human-readable information line.
  313. mComment.Append(buf + 4);
  314. char *value = ((char *)buf) + 4;
  315. nsUnescape(value);
  316. mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value));
  317. } else if (buf[2] == '2' && buf[3] == ':') {
  318. // 102. Human-readable information line, HTML.
  319. mComment.Append(buf + 4);
  320. }
  321. }
  322. } else if (buf[0] == '2') {
  323. if (buf[1] == '0') {
  324. if (buf[2] == '0' && buf[3] == ':') {
  325. // 200. Define field names
  326. rv = ParseFormat(buf + 4);
  327. if (NS_FAILED(rv)) {
  328. return rv;
  329. }
  330. } else if (buf[2] == '1' && buf[3] == ':') {
  331. // 201. Field data
  332. nsCOMPtr<nsIDirIndex> idx = do_CreateInstance("@mozilla.org/dirIndex;1",&rv);
  333. if (NS_FAILED(rv))
  334. return rv;
  335. rv = ParseData(idx, ((char *)buf) + 4, lineLen - 4);
  336. if (NS_FAILED(rv)) {
  337. return rv;
  338. }
  339. mListener->OnIndexAvailable(aRequest, aCtxt, idx);
  340. }
  341. }
  342. } else if (buf[0] == '3') {
  343. if (buf[1] == '0') {
  344. if (buf[2] == '0' && buf[3] == ':') {
  345. // 300. Self-referring URL
  346. } else if (buf[2] == '1' && buf[3] == ':') {
  347. // 301. OUR EXTENSION - encoding
  348. int i = 4;
  349. while (buf[i] && nsCRT::IsAsciiSpace(buf[i]))
  350. ++i;
  351. if (buf[i])
  352. SetEncoding(buf+i);
  353. }
  354. }
  355. }
  356. }
  357. }
  358. return NS_OK;
  359. }