testRegExp.cpp 14 KB

  1. /*
  2. * Copyright (C) 2011 Apple Inc. All rights reserved.
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Library General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * Library General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Library General Public License
  15. * along with this library; see the file COPYING.LIB. If not, write to
  16. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  17. * Boston, MA 02110-1301, USA.
  18. *
  19. */
  20. #include "config.h"
  21. #include "RegExp.h"
  22. #include "APIShims.h"
  23. #include <wtf/CurrentTime.h>
  24. #include "InitializeThreading.h"
  25. #include "JSGlobalObject.h"
  26. #include "Operations.h"
  27. #include <errno.h>
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #include <string.h>
  31. #include <wtf/text/StringBuilder.h>
  32. #if !OS(WINDOWS)
  33. #include <unistd.h>
  34. #endif
  35. #if HAVE(SYS_TIME_H)
  36. #include <sys/time.h>
  37. #endif
  38. #if COMPILER(MSVC) && !OS(WINCE)
  39. #include <crtdbg.h>
  40. #include <mmsystem.h>
  41. #include <windows.h>
  42. #endif
  43. #if PLATFORM(QT)
  44. #include <QCoreApplication>
  45. #include <QDateTime>
  46. #endif
  47. const int MaxLineLength = 100 * 1024;
  48. using namespace JSC;
  49. using namespace WTF;
  50. struct CommandLine {
  51. CommandLine()
  52. : interactive(false)
  53. , verbose(false)
  54. {
  55. }
  56. bool interactive;
  57. bool verbose;
  58. Vector<String> arguments;
  59. Vector<String> files;
  60. };
  61. class StopWatch {
  62. public:
  63. void start();
  64. void stop();
  65. long getElapsedMS(); // call stop() first
  66. private:
  67. double m_startTime;
  68. double m_stopTime;
  69. };
  70. void StopWatch::start()
  71. {
  72. m_startTime = currentTime();
  73. }
  74. void StopWatch::stop()
  75. {
  76. m_stopTime = currentTime();
  77. }
  78. long StopWatch::getElapsedMS()
  79. {
  80. return static_cast<long>((m_stopTime - m_startTime) * 1000);
  81. }
  82. struct RegExpTest {
  83. RegExpTest()
  84. : offset(0)
  85. , result(0)
  86. {
  87. }
  88. String subject;
  89. int offset;
  90. int result;
  91. Vector<int, 32> expectVector;
  92. };
  93. class GlobalObject : public JSGlobalObject {
  94. private:
  95. GlobalObject(VM&, Structure*, const Vector<String>& arguments);
  96. public:
  97. typedef JSGlobalObject Base;
  98. static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
  99. {
  100. GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
  101. vm.heap.addFinalizer(globalObject, destroy);
  102. return globalObject;
  103. }
  104. static const ClassInfo s_info;
  105. static const bool needsDestructor = false;
  106. static Structure* createStructure(VM& vm, JSValue prototype)
  107. {
  108. return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), &s_info);
  109. }
  110. protected:
  111. void finishCreation(VM& vm, const Vector<String>& arguments)
  112. {
  113. Base::finishCreation(vm);
  114. UNUSED_PARAM(arguments);
  115. }
  116. };
  117. COMPILE_ASSERT(!IsInteger<GlobalObject>::value, WTF_IsInteger_GlobalObject_false);
  118. const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, 0, ExecState::globalObjectTable, CREATE_METHOD_TABLE(GlobalObject) };
  119. GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
  120. : JSGlobalObject(vm, structure)
  121. {
  122. finishCreation(vm, arguments);
  123. }
  124. // Use SEH for Release builds only to get rid of the crash report dialog
  125. // (luckily the same tests fail in Release and Debug builds so far). Need to
  126. // be in a separate main function because the realMain function requires object
  127. // unwinding.
  128. #if COMPILER(MSVC) && !COMPILER(INTEL) && !defined(_DEBUG) && !OS(WINCE)
  129. #define TRY __try {
  130. #define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
  131. #else
  132. #define TRY
  133. #define EXCEPT(x)
  134. #endif
  135. int realMain(int argc, char** argv);
  136. int main(int argc, char** argv)
  137. {
  138. #if OS(WINDOWS)
  139. #if !OS(WINCE)
  140. // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
  141. // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
  142. // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
  143. ::SetErrorMode(0);
  144. #endif
  145. #if defined(_DEBUG)
  146. _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
  147. _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
  148. _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
  149. _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
  150. _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
  151. _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
  152. #endif
  153. timeBeginPeriod(1);
  154. #endif
  155. #if PLATFORM(QT)
  156. QCoreApplication app(argc, argv);
  157. #endif
  158. // Initialize JSC before getting VM.
  159. JSC::initializeThreading();
  160. // We can't use destructors in the following code because it uses Windows
  161. // Structured Exception Handling
  162. int res = 0;
  163. TRY
  164. res = realMain(argc, argv);
  165. EXCEPT(res = 3)
  166. return res;
  167. }
  168. static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
  169. {
  170. bool result = true;
  171. Vector<int, 32> outVector;
  172. outVector.resize(regExpTest->expectVector.size());
  173. int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
  174. if (matchResult != regExpTest->result) {
  175. result = false;
  176. if (verbose)
  177. printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
  178. } else if (matchResult != -1) {
  179. if (outVector.size() != regExpTest->expectVector.size()) {
  180. result = false;
  181. if (verbose)
  182. printf("Line %d: output vector size mismatch - expected %lu got %lu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
  183. } else if (outVector.size() % 2) {
  184. result = false;
  185. if (verbose)
  186. printf("Line %d: output vector size is odd (%lu), should be even\n", lineNumber, outVector.size());
  187. } else {
  188. // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
  189. size_t pairCount = outVector.size() / 2;
  190. for (size_t i = 0; i < pairCount; ++i) {
  191. size_t startIndex = i*2;
  192. if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
  193. result = false;
  194. if (verbose)
  195. printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
  196. }
  197. if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
  198. result = false;
  199. if (verbose)
  200. printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex+1, regExpTest->expectVector[startIndex+1], outVector[startIndex+1]);
  201. }
  202. }
  203. }
  204. }
  205. return result;
  206. }
  207. static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
  208. {
  209. bool escape = false;
  210. for (int i = 0; i < bufferLength; ++i) {
  211. UChar c = buffer[i];
  212. if (escape) {
  213. switch (c) {
  214. case '0':
  215. c = '\0';
  216. break;
  217. case 'a':
  218. c = '\a';
  219. break;
  220. case 'b':
  221. c = '\b';
  222. break;
  223. case 'f':
  224. c = '\f';
  225. break;
  226. case 'n':
  227. c = '\n';
  228. break;
  229. case 'r':
  230. c = '\r';
  231. break;
  232. case 't':
  233. c = '\t';
  234. break;
  235. case 'v':
  236. c = '\v';
  237. break;
  238. case '\\':
  239. c = '\\';
  240. break;
  241. case '?':
  242. c = '\?';
  243. break;
  244. case 'u':
  245. if ((i + 4) >= bufferLength)
  246. return -1;
  247. unsigned int charValue;
  248. if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
  249. return -1;
  250. c = static_cast<UChar>(charValue);
  251. i += 4;
  252. break;
  253. }
  254. builder.append(c);
  255. escape = false;
  256. } else {
  257. if (c == termChar)
  258. return i;
  259. if (c == '\\')
  260. escape = true;
  261. else
  262. builder.append(c);
  263. }
  264. }
  265. return -1;
  266. }
  267. static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
  268. {
  269. StringBuilder pattern;
  270. if (line[0] != '/')
  271. return 0;
  272. int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
  273. if ((i >= lineLength) || (line[i] != '/'))
  274. return 0;
  275. ++i;
  276. return RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
  277. }
  278. static RegExpTest* parseTestLine(char* line, int lineLength)
  279. {
  280. StringBuilder subjectString;
  281. if ((line[0] != ' ') || (line[1] != '"'))
  282. return 0;
  283. int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
  284. if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
  285. return 0;
  286. i += 3;
  287. int offset;
  288. if (sscanf(line + i, "%d, ", &offset) != 1)
  289. return 0;
  290. while (line[i] && line[i] != ' ')
  291. ++i;
  292. ++i;
  293. int matchResult;
  294. if (sscanf(line + i, "%d, ", &matchResult) != 1)
  295. return 0;
  296. while (line[i] && line[i] != ' ')
  297. ++i;
  298. ++i;
  299. if (line[i++] != '(')
  300. return 0;
  301. int start, end;
  302. RegExpTest* result = new RegExpTest();
  303. result->subject = subjectString.toString();
  304. result->offset = offset;
  305. result->result = matchResult;
  306. while (line[i] && line[i] != ')') {
  307. if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
  308. delete result;
  309. return 0;
  310. }
  311. result->expectVector.append(start);
  312. result->expectVector.append(end);
  313. while (line[i] && (line[i] != ',') && (line[i] != ')'))
  314. i++;
  315. i++;
  316. while (line[i] && (line[i] != ',') && (line[i] != ')'))
  317. i++;
  318. if (line[i] == ')')
  319. break;
  320. if (!line[i] || (line[i] != ',')) {
  321. delete result;
  322. return 0;
  323. }
  324. i++;
  325. }
  326. return result;
  327. }
  328. static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
  329. {
  330. String script;
  331. String fileName;
  332. Vector<char> scriptBuffer;
  333. unsigned tests = 0;
  334. unsigned failures = 0;
  335. char* lineBuffer = new char[MaxLineLength + 1];
  336. VM& vm = globalObject->vm();
  337. bool success = true;
  338. for (size_t i = 0; i < files.size(); i++) {
  339. FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
  340. if (!testCasesFile) {
  341. printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
  342. continue;
  343. }
  344. RegExp* regexp = 0;
  345. size_t lineLength = 0;
  346. char* linePtr = 0;
  347. unsigned int lineNumber = 0;
  348. while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
  349. lineLength = strlen(linePtr);
  350. if (linePtr[lineLength - 1] == '\n') {
  351. linePtr[lineLength - 1] = '\0';
  352. --lineLength;
  353. }
  354. ++lineNumber;
  355. if (linePtr[0] == '#')
  356. continue;
  357. if (linePtr[0] == '/') {
  358. regexp = parseRegExpLine(vm, linePtr, lineLength);
  359. } else if (linePtr[0] == ' ') {
  360. RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
  361. if (regexp && regExpTest) {
  362. ++tests;
  363. if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
  364. failures++;
  365. printf("Failure on line %u\n", lineNumber);
  366. }
  367. }
  368. if (regExpTest)
  369. delete regExpTest;
  370. }
  371. }
  372. fclose(testCasesFile);
  373. }
  374. if (failures)
  375. printf("%u tests run, %u failures\n", tests, failures);
  376. else
  377. printf("%u tests passed\n", tests);
  378. delete[] lineBuffer;
  379. vm.dumpSampleData(globalObject->globalExec());
  381. vm.dumpRegExpTrace();
  382. #endif
  383. return success;
  384. }
  385. #define RUNNING_FROM_XCODE 0
  386. static NO_RETURN void printUsageStatement(bool help = false)
  387. {
  388. fprintf(stderr, "Usage: regexp_test [options] file\n");
  389. fprintf(stderr, " -h|--help Prints this help message\n");
  390. fprintf(stderr, " -v|--verbose Verbose output\n");
  391. exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
  392. }
  393. static void parseArguments(int argc, char** argv, CommandLine& options)
  394. {
  395. int i = 1;
  396. for (; i < argc; ++i) {
  397. const char* arg = argv[i];
  398. if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
  399. printUsageStatement(true);
  400. if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
  401. options.verbose = true;
  402. else
  403. options.files.append(argv[i]);
  404. }
  405. for (; i < argc; ++i)
  406. options.arguments.append(argv[i]);
  407. }
  408. int realMain(int argc, char** argv)
  409. {
  410. VM* vm = VM::create(LargeHeap).leakRef();
  411. APIEntryShim shim(vm);
  412. CommandLine options;
  413. parseArguments(argc, argv, options);
  414. GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
  415. bool success = runFromFiles(globalObject, options.files, options.verbose);
  416. return success ? 0 : 3;
  417. }