WebNSDataExtras.m 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /*
  2. * Copyright (C) 2005 Apple Computer, Inc. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
  14. * its contributors may be used to endorse or promote products derived
  15. * from this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  18. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  19. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  20. * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  21. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  22. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  23. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  24. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. */
  28. #import <WebKit/WebNSDataExtras.h>
  29. #import <WebKit/WebNSDataExtrasPrivate.h>
  30. #import <wtf/Assertions.h>
  31. @interface NSString (WebNSDataExtrasInternal)
  32. - (NSString *)_web_capitalizeRFC822HeaderFieldName;
  33. @end
  34. @implementation NSString (WebNSDataExtrasInternal)
  35. -(NSString *)_web_capitalizeRFC822HeaderFieldName
  36. {
  37. CFStringRef name = (CFStringRef)self;
  38. NSString *result = nil;
  39. CFIndex i;
  40. CFIndex len = CFStringGetLength(name);
  41. char *charPtr = NULL;
  42. UniChar *uniCharPtr = NULL;
  43. Boolean useUniCharPtr = FALSE;
  44. Boolean shouldCapitalize = TRUE;
  45. Boolean somethingChanged = FALSE;
  46. for (i = 0; i < len; i ++) {
  47. UniChar ch = CFStringGetCharacterAtIndex(name, i);
  48. Boolean replace = FALSE;
  49. if (shouldCapitalize && ch >= 'a' && ch <= 'z') {
  50. ch = ch + 'A' - 'a';
  51. replace = TRUE;
  52. }
  53. else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') {
  54. ch = ch + 'a' - 'A';
  55. replace = TRUE;
  56. }
  57. if (replace) {
  58. if (!somethingChanged) {
  59. somethingChanged = TRUE;
  60. if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) {
  61. // Can be encoded in ISOLatin1
  62. useUniCharPtr = FALSE;
  63. charPtr = CFAllocatorAllocate(NULL, len + 1, 0);
  64. CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1);
  65. }
  66. else {
  67. useUniCharPtr = TRUE;
  68. uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0);
  69. CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr);
  70. }
  71. }
  72. if (useUniCharPtr) {
  73. uniCharPtr[i] = ch;
  74. }
  75. else {
  76. charPtr[i] = ch;
  77. }
  78. }
  79. if (ch == '-') {
  80. shouldCapitalize = TRUE;
  81. }
  82. else {
  83. shouldCapitalize = FALSE;
  84. }
  85. }
  86. if (somethingChanged) {
  87. if (useUniCharPtr) {
  88. result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL));
  89. }
  90. else {
  91. result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL));
  92. }
  93. }
  94. else {
  95. result = [self retain];
  96. }
  97. return [result autorelease];
  98. }
  99. @end
  100. @implementation NSData (WebKitExtras)
  101. -(NSString *)_webkit_guessedMIMETypeForXML
  102. {
  103. int length = [self length];
  104. const UInt8 *bytes = [self bytes];
  105. #define CHANNEL_TAG_LENGTH 7
  106. const char *p = (const char *)bytes;
  107. int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1);
  108. BOOL foundRDF = false;
  109. while (remaining > 0) {
  110. // Look for a "<".
  111. const char *hit = memchr(p, '<', remaining);
  112. if (!hit) {
  113. break;
  114. }
  115. // We are trying to identify RSS or Atom. RSS has a top-level
  116. // element of either <rss> or <rdf>. However, there are
  117. // non-RSS RDF files, so in the case of <rdf> we further look
  118. // for a <channel> element. In the case of an Atom file, a
  119. // top-level <feed> element is all we need to see. Only tags
  120. // starting with <? or <! can precede the root element. We
  121. // bail if we don't find an <rss>, <feed> or <rdf> element
  122. // right after those.
  123. if (foundRDF) {
  124. if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) {
  125. return @"application/rss+xml";
  126. }
  127. } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) {
  128. foundRDF = TRUE;
  129. } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) {
  130. return @"application/rss+xml";
  131. } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) {
  132. return @"application/atom+xml";
  133. } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) {
  134. return nil;
  135. }
  136. // Skip the "<" and continue.
  137. remaining -= (hit + 1) - p;
  138. p = hit + 1;
  139. }
  140. return nil;
  141. }
  142. -(NSString *)_webkit_guessedMIMEType
  143. {
  144. #define JPEG_MAGIC_NUMBER_LENGTH 4
  145. #define SCRIPT_TAG_LENGTH 7
  146. #define TEXT_HTML_LENGTH 9
  147. #define VCARD_HEADER_LENGTH 11
  148. #define VCAL_HEADER_LENGTH 15
  149. NSString *MIMEType = [self _webkit_guessedMIMETypeForXML];
  150. if ([MIMEType length])
  151. return MIMEType;
  152. int length = [self length];
  153. const char *bytes = [self bytes];
  154. const char *p = bytes;
  155. int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1);
  156. while (remaining > 0) {
  157. // Look for a "<".
  158. const char *hit = memchr(p, '<', remaining);
  159. if (!hit) {
  160. break;
  161. }
  162. // If we found a "<", look for "<html>" or "<a " or "<script".
  163. if (strncasecmp(hit, "<html>", strlen("<html>")) == 0 ||
  164. strncasecmp(hit, "<a ", strlen("<a ")) == 0 ||
  165. strncasecmp(hit, "<script", strlen("<script")) == 0 ||
  166. strncasecmp(hit, "<title>", strlen("<title>")) == 0) {
  167. return @"text/html";
  168. }
  169. // Skip the "<" and continue.
  170. remaining -= (hit + 1) - p;
  171. p = hit + 1;
  172. }
  173. // Test for a broken server which has sent the content type as part of the content.
  174. // This code could be improved to look for other mime types.
  175. p = bytes;
  176. remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1);
  177. while (remaining > 0) {
  178. // Look for a "t" or "T".
  179. const char *hit = NULL;
  180. const char *lowerhit = memchr(p, 't', remaining);
  181. const char *upperhit = memchr(p, 'T', remaining);
  182. if (!lowerhit && !upperhit) {
  183. break;
  184. }
  185. if (!lowerhit) {
  186. hit = upperhit;
  187. }
  188. else if (!upperhit) {
  189. hit = lowerhit;
  190. }
  191. else {
  192. hit = MIN(lowerhit, upperhit);
  193. }
  194. // If we found a "t/T", look for "text/html".
  195. if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) {
  196. return @"text/html";
  197. }
  198. // Skip the "t/T" and continue.
  199. remaining -= (hit + 1) - p;
  200. p = hit + 1;
  201. }
  202. if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) {
  203. return @"text/vcard";
  204. }
  205. if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) {
  206. return @"text/calendar";
  207. }
  208. // Test for plain text.
  209. int i;
  210. for(i=0; i<length; i++){
  211. char c = bytes[i];
  212. if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) {
  213. break;
  214. }
  215. }
  216. if (i == length) {
  217. // Didn't encounter any bad characters, looks like plain text.
  218. return @"text/plain";
  219. }
  220. // Looks like this is a binary file.
  221. // Sniff for the JPEG magic number.
  222. if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) {
  223. return @"image/jpeg";
  224. }
  225. #undef JPEG_MAGIC_NUMBER_LENGTH
  226. #undef SCRIPT_TAG_LENGTH
  227. #undef TEXT_HTML_LENGTH
  228. #undef VCARD_HEADER_LENGTH
  229. #undef VCAL_HEADER_LENGTH
  230. return nil;
  231. }
  232. @end
  233. @implementation NSData (WebNSDataExtras)
  234. -(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string
  235. {
  236. ASSERT(string);
  237. const char *bytes = [self bytes];
  238. return strncasecmp(bytes, string, [self length]) == 0;
  239. }
  240. static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) {
  241. // According to the HTTP specification EOL is defined as
  242. // a CRLF pair. Unfortunately, some servers will use LF
  243. // instead. Worse yet, some servers will use a combination
  244. // of both (e.g. <header>CRLFLF<body>), so findEOL needs
  245. // to be more forgiving. It will now accept CRLF, LF, or
  246. // CR.
  247. //
  248. // It returns NULL if EOL is not found or it will return
  249. // a pointer to the first terminating character.
  250. CFIndex i;
  251. for (i = 0; i < len; i++)
  252. {
  253. UInt8 c = bytes[i];
  254. if ('\n' == c) return bytes + i;
  255. if ('\r' == c)
  256. {
  257. // Check to see if spanning buffer bounds
  258. // (CRLF is across reads). If so, wait for
  259. // next read.
  260. if (i + 1 == len) break;
  261. return bytes + i;
  262. }
  263. }
  264. return NULL;
  265. }
  266. -(NSMutableDictionary *)_webkit_parseRFC822HeaderFields
  267. {
  268. NSMutableDictionary *headerFields = [NSMutableDictionary dictionary];
  269. const UInt8 *bytes = [self bytes];
  270. unsigned length = [self length];
  271. NSString *lastKey = nil;
  272. const UInt8 *eol;
  273. // Loop over lines until we're past the header, or we can't find any more end-of-lines
  274. while ((eol = _findEOL(bytes, length))) {
  275. const UInt8 *line = bytes;
  276. SInt32 lineLength = eol - bytes;
  277. // Move bytes to the character after the terminator as returned by _findEOL.
  278. bytes = eol + 1;
  279. if (('\r' == *eol) && ('\n' == *bytes)) {
  280. bytes++; // Safe since _findEOL won't return a spanning CRLF.
  281. }
  282. length -= (bytes - line);
  283. if (lineLength == 0) {
  284. // Blank line; we're at the end of the header
  285. break;
  286. }
  287. else if (*line == ' ' || *line == '\t') {
  288. // Continuation of the previous header
  289. if (!lastKey) {
  290. // malformed header; ignore it and continue
  291. continue;
  292. }
  293. else {
  294. // Merge the continuation of the previous header
  295. NSString *currentValue = [headerFields objectForKey:lastKey];
  296. NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE));
  297. ASSERT(currentValue);
  298. ASSERT(newValue);
  299. NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue];
  300. [headerFields setObject:(NSString *)mergedValue forKey:lastKey];
  301. [newValue release];
  302. [mergedValue release];
  303. // Note: currentValue is autoreleased
  304. }
  305. }
  306. else {
  307. // Brand new header
  308. const UInt8 *colon;
  309. for (colon = line; *colon != ':' && colon != eol; colon ++) {
  310. // empty loop
  311. }
  312. if (colon == eol) {
  313. // malformed header; ignore it and continue
  314. continue;
  315. }
  316. else {
  317. lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE));
  318. [lastKey autorelease];
  319. NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName];
  320. lastKey = value;
  321. for (colon++; colon != eol; colon++) {
  322. if (*colon != ' ' && *colon != '\t') {
  323. break;
  324. }
  325. }
  326. if (colon == eol) {
  327. value = [[NSString alloc] initWithString:@""];
  328. [value autorelease];
  329. }
  330. else {
  331. value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE));
  332. [value autorelease];
  333. }
  334. NSString *oldValue = [headerFields objectForKey:lastKey];
  335. if (oldValue) {
  336. NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value];
  337. value = newValue;
  338. [newValue autorelease];
  339. }
  340. [headerFields setObject:(NSString *)value forKey:lastKey];
  341. }
  342. }
  343. }
  344. return headerFields;
  345. }
  346. - (BOOL)_web_startsWithBlankLine
  347. {
  348. return [self length] > 0 && ((const char *)[self bytes])[0] == '\n';
  349. }
  350. - (NSInteger)_web_locationAfterFirstBlankLine
  351. {
  352. const char *bytes = (const char *)[self bytes];
  353. unsigned length = [self length];
  354. unsigned i;
  355. for (i = 0; i < length - 4; i++) {
  356. // Support for Acrobat. It sends "\n\n".
  357. if (bytes[i] == '\n' && bytes[i+1] == '\n') {
  358. return i+2;
  359. }
  360. // Returns the position after 2 CRLF's or 1 CRLF if it is the first line.
  361. if (bytes[i] == '\r' && bytes[i+1] == '\n') {
  362. i += 2;
  363. if (i == 2) {
  364. return i;
  365. } else if (bytes[i] == '\n') {
  366. // Support for Director. It sends "\r\n\n" (3880387).
  367. return i+1;
  368. } else if (bytes[i] == '\r' && bytes[i+1] == '\n') {
  369. // Support for Flash. It sends "\r\n\r\n" (3758113).
  370. return i+2;
  371. }
  372. }
  373. }
  374. return NSNotFound;
  375. }
  376. @end