WebArchiveDumpSupport.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. * Copyright (C) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  18. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  19. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  20. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  21. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "config.h"
  26. #include "WebArchiveDumpSupport.h"
  27. #include <CFNetwork/CFNetwork.h>
  28. #include <CoreFoundation/CoreFoundation.h>
  29. #include <wtf/RetainPtr.h>
  30. extern "C" {
  31. CFURLRef CFURLResponseGetURL(CFURLResponseRef);
  32. CFStringRef CFURLResponseGetMIMEType(CFURLResponseRef);
  33. CFStringRef CFURLResponseGetTextEncodingName(CFURLResponseRef);
  34. SInt64 CFURLResponseGetExpectedContentLength(CFURLResponseRef);
  35. CFHTTPMessageRef CFURLResponseGetHTTPResponse(CFURLResponseRef);
  36. CFTypeID CFURLResponseGetTypeID(void);
  37. }
  38. static void convertMIMEType(CFMutableStringRef mimeType)
  39. {
  40. // Workaround for <rdar://problem/6234318> with Dashcode 2.0
  41. if (CFStringCompare(mimeType, CFSTR("application/x-javascript"), kCFCompareAnchored | kCFCompareCaseInsensitive) == kCFCompareEqualTo)
  42. CFStringReplaceAll(mimeType, CFSTR("text/javascript"));
  43. }
  44. static void convertWebResourceDataToString(CFMutableDictionaryRef resource)
  45. {
  46. CFMutableStringRef mimeType = (CFMutableStringRef)CFDictionaryGetValue(resource, CFSTR("WebResourceMIMEType"));
  47. CFStringLowercase(mimeType, CFLocaleGetSystem());
  48. convertMIMEType(mimeType);
  49. CFArrayRef supportedMIMETypes = supportedNonImageMIMETypes();
  50. if (CFStringHasPrefix(mimeType, CFSTR("text/")) || CFArrayContainsValue(supportedMIMETypes, CFRangeMake(0, CFArrayGetCount(supportedMIMETypes)), mimeType)) {
  51. CFStringRef textEncodingName = static_cast<CFStringRef>(CFDictionaryGetValue(resource, CFSTR("WebResourceTextEncodingName")));
  52. CFStringEncoding stringEncoding;
  53. if (textEncodingName && CFStringGetLength(textEncodingName))
  54. stringEncoding = CFStringConvertIANACharSetNameToEncoding(textEncodingName);
  55. else
  56. stringEncoding = kCFStringEncodingUTF8;
  57. CFDataRef data = static_cast<CFDataRef>(CFDictionaryGetValue(resource, CFSTR("WebResourceData")));
  58. RetainPtr<CFStringRef> dataAsString = adoptCF(CFStringCreateFromExternalRepresentation(kCFAllocatorDefault, data, stringEncoding));
  59. if (dataAsString)
  60. CFDictionarySetValue(resource, CFSTR("WebResourceData"), dataAsString.get());
  61. }
  62. }
  63. static void normalizeHTTPResponseHeaderFields(CFMutableDictionaryRef fields)
  64. {
  65. // Normalize headers
  66. if (CFDictionaryContainsKey(fields, CFSTR("Date")))
  67. CFDictionarySetValue(fields, CFSTR("Date"), CFSTR("Sun, 16 Nov 2008 17:00:00 GMT"));
  68. if (CFDictionaryContainsKey(fields, CFSTR("Last-Modified")))
  69. CFDictionarySetValue(fields, CFSTR("Last-Modified"), CFSTR("Sun, 16 Nov 2008 16:55:00 GMT"));
  70. if (CFDictionaryContainsKey(fields, CFSTR("Etag")))
  71. CFDictionarySetValue(fields, CFSTR("Etag"), CFSTR("\"301925-21-45c7d72d3e780\""));
  72. if (CFDictionaryContainsKey(fields, CFSTR("Server")))
  73. CFDictionarySetValue(fields, CFSTR("Server"), CFSTR("Apache/2.2.9 (Unix) mod_ssl/2.2.9 OpenSSL/0.9.7l PHP/5.2.6"));
  74. // Remove headers
  75. CFDictionaryRemoveValue(fields, CFSTR("Connection"));
  76. CFDictionaryRemoveValue(fields, CFSTR("Keep-Alive"));
  77. }
  78. static void normalizeWebResourceURL(CFMutableStringRef webResourceURL)
  79. {
  80. static CFIndex fileUrlLength = CFStringGetLength(CFSTR("file://"));
  81. CFRange layoutTestsWebArchivePathRange = CFStringFind(webResourceURL, CFSTR("/LayoutTests/"), kCFCompareBackwards);
  82. if (layoutTestsWebArchivePathRange.location == kCFNotFound)
  83. return;
  84. CFRange currentWorkingDirectoryRange = CFRangeMake(fileUrlLength, layoutTestsWebArchivePathRange.location - fileUrlLength);
  85. CFStringReplace(webResourceURL, currentWorkingDirectoryRange, CFSTR(""));
  86. }
  87. static void convertWebResourceResponseToDictionary(CFMutableDictionaryRef propertyList)
  88. {
  89. CFDataRef responseData = static_cast<CFDataRef>(CFDictionaryGetValue(propertyList, CFSTR("WebResourceResponse"))); // WebResourceResponseKey in WebResource.m
  90. if (CFGetTypeID(responseData) != CFDataGetTypeID())
  91. return;
  92. RetainPtr<CFURLResponseRef> response = adoptCF(createCFURLResponseFromResponseData(responseData));
  93. if (!response)
  94. return;
  95. RetainPtr<CFMutableDictionaryRef> responseDictionary = adoptCF(CFDictionaryCreateMutable(kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks));
  96. RetainPtr<CFMutableStringRef> urlString = adoptCF(CFStringCreateMutableCopy(kCFAllocatorDefault, 0, CFURLGetString(CFURLResponseGetURL(response.get()))));
  97. normalizeWebResourceURL(urlString.get());
  98. CFDictionarySetValue(responseDictionary.get(), CFSTR("URL"), urlString.get());
  99. RetainPtr<CFMutableStringRef> mimeTypeString = adoptCF(CFStringCreateMutableCopy(kCFAllocatorDefault, 0, CFURLResponseGetMIMEType(response.get())));
  100. convertMIMEType(mimeTypeString.get());
  101. CFDictionarySetValue(responseDictionary.get(), CFSTR("MIMEType"), mimeTypeString.get());
  102. CFStringRef textEncodingName = CFURLResponseGetTextEncodingName(response.get());
  103. if (textEncodingName)
  104. CFDictionarySetValue(responseDictionary.get(), CFSTR("textEncodingName"), textEncodingName);
  105. SInt64 expectedContentLength = CFURLResponseGetExpectedContentLength(response.get());
  106. RetainPtr<CFNumberRef> expectedContentLengthNumber = adoptCF(CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &expectedContentLength));
  107. CFDictionarySetValue(responseDictionary.get(), CFSTR("expectedContentLength"), expectedContentLengthNumber.get());
  108. if (CFHTTPMessageRef httpMessage = CFURLResponseGetHTTPResponse(response.get())) {
  109. RetainPtr<CFDictionaryRef> allHeaders = adoptCF(CFHTTPMessageCopyAllHeaderFields(httpMessage));
  110. RetainPtr<CFMutableDictionaryRef> allHeaderFields = adoptCF(CFDictionaryCreateMutableCopy(kCFAllocatorDefault, 0, allHeaders.get()));
  111. normalizeHTTPResponseHeaderFields(allHeaderFields.get());
  112. CFDictionarySetValue(responseDictionary.get(), CFSTR("allHeaderFields"), allHeaderFields.get());
  113. CFIndex statusCode = CFHTTPMessageGetResponseStatusCode(httpMessage);
  114. RetainPtr<CFNumberRef> statusCodeNumber = adoptCF(CFNumberCreate(kCFAllocatorDefault, kCFNumberCFIndexType, &statusCode));
  115. CFDictionarySetValue(responseDictionary.get(), CFSTR("statusCode"), statusCodeNumber.get());
  116. }
  117. CFDictionarySetValue(propertyList, CFSTR("WebResourceResponse"), responseDictionary.get());
  118. }
  119. static CFComparisonResult compareResourceURLs(const void *val1, const void *val2, void *context)
  120. {
  121. CFStringRef url1 = static_cast<CFStringRef>(CFDictionaryGetValue(static_cast<CFDictionaryRef>(val1), CFSTR("WebResourceURL")));
  122. CFStringRef url2 = static_cast<CFStringRef>(CFDictionaryGetValue(static_cast<CFDictionaryRef>(val2), CFSTR("WebResourceURL")));
  123. return CFStringCompare(url1, url2, kCFCompareAnchored);
  124. }
  125. CFStringRef createXMLStringFromWebArchiveData(CFDataRef webArchiveData)
  126. {
  127. CFErrorRef error = 0;
  128. CFPropertyListFormat format = kCFPropertyListBinaryFormat_v1_0;
  129. RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF((CFMutableDictionaryRef)CFPropertyListCreateWithData(kCFAllocatorDefault, webArchiveData, kCFPropertyListMutableContainersAndLeaves, &format, &error));
  130. if (!propertyList) {
  131. if (error)
  132. return CFErrorCopyDescription(error);
  133. return static_cast<CFStringRef>(CFRetain(CFSTR("An unknown error occurred converting data to property list.")));
  134. }
  135. RetainPtr<CFMutableArrayRef> resources = adoptCF(CFArrayCreateMutable(kCFAllocatorDefault, 0, &kCFTypeArrayCallBacks));
  136. CFArrayAppendValue(resources.get(), propertyList.get());
  137. while (CFArrayGetCount(resources.get())) {
  138. RetainPtr<CFMutableDictionaryRef> resourcePropertyList = (CFMutableDictionaryRef)CFArrayGetValueAtIndex(resources.get(), 0);
  139. CFArrayRemoveValueAtIndex(resources.get(), 0);
  140. CFMutableDictionaryRef mainResource = (CFMutableDictionaryRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebMainResource"));
  141. normalizeWebResourceURL((CFMutableStringRef)CFDictionaryGetValue(mainResource, CFSTR("WebResourceURL")));
  142. convertWebResourceDataToString(mainResource);
  143. // Add subframeArchives to list for processing
  144. CFMutableArrayRef subframeArchives = (CFMutableArrayRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebSubframeArchives")); // WebSubframeArchivesKey in WebArchive.m
  145. if (subframeArchives)
  146. CFArrayAppendArray(resources.get(), subframeArchives, CFRangeMake(0, CFArrayGetCount(subframeArchives)));
  147. CFMutableArrayRef subresources = (CFMutableArrayRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebSubresources")); // WebSubresourcesKey in WebArchive.m
  148. if (!subresources)
  149. continue;
  150. CFIndex subresourcesCount = CFArrayGetCount(subresources);
  151. for (CFIndex i = 0; i < subresourcesCount; ++i) {
  152. CFMutableDictionaryRef subresourcePropertyList = (CFMutableDictionaryRef)CFArrayGetValueAtIndex(subresources, i);
  153. normalizeWebResourceURL((CFMutableStringRef)CFDictionaryGetValue(subresourcePropertyList, CFSTR("WebResourceURL")));
  154. convertWebResourceResponseToDictionary(subresourcePropertyList);
  155. convertWebResourceDataToString(subresourcePropertyList);
  156. }
  157. // Sort the subresources so they're always in a predictable order for the dump
  158. CFArraySortValues(subresources, CFRangeMake(0, CFArrayGetCount(subresources)), compareResourceURLs, 0);
  159. }
  160. error = 0;
  161. RetainPtr<CFDataRef> xmlData = adoptCF(CFPropertyListCreateData(kCFAllocatorDefault, propertyList.get(), kCFPropertyListXMLFormat_v1_0, 0, &error));
  162. if (!xmlData) {
  163. if (error)
  164. return CFErrorCopyDescription(error);
  165. return static_cast<CFStringRef>(CFRetain(CFSTR("An unknown error occurred converting property list to data.")));
  166. }
  167. RetainPtr<CFStringRef> xmlString = adoptCF(CFStringCreateFromExternalRepresentation(kCFAllocatorDefault, xmlData.get(), kCFStringEncodingUTF8));
  168. RetainPtr<CFMutableStringRef> string = adoptCF(CFStringCreateMutableCopy(kCFAllocatorDefault, 0, xmlString.get()));
  169. // Replace "Apple Computer" with "Apple" in the DTD declaration.
  170. CFStringFindAndReplace(string.get(), CFSTR("-//Apple Computer//"), CFSTR("-//Apple//"), CFRangeMake(0, CFStringGetLength(string.get())), 0);
  171. return string.leakRef();
  172. }