winutils.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * winutils.c: miscellaneous Windows utilities for GUI apps
  3. */
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <ctype.h>
  7. #include "misc.h"
  8. #ifdef TESTMODE
  9. /* Definitions to allow this module to be compiled standalone for testing. */
  10. #define smalloc malloc
  11. #define srealloc realloc
  12. #define sfree free
  13. #endif
  14. /*
  15. * Split a complete command line into argc/argv, attempting to do
  16. * it exactly the same way Windows itself would do it (so that
  17. * console utilities, which receive argc and argv from Windows,
  18. * will have their command lines processed in the same way as GUI
  19. * utilities which get a whole command line and must break it
  20. * themselves).
  21. *
  22. * Does not modify the input command line.
  23. *
  24. * The final parameter (argstart) is used to return a second array
  25. * of char * pointers, the same length as argv, each one pointing
  26. * at the start of the corresponding element of argv in the
  27. * original command line. So if you get half way through processing
  28. * your command line in argc/argv form and then decide you want to
  29. * treat the rest as a raw string, you can. If you don't want to,
  30. * `argstart' can be safely left NULL.
  31. */
  32. void split_into_argv(char *cmdline, int *argc, char ***argv,
  33. char ***argstart)
  34. {
  35. char *p;
  36. char *outputline, *q;
  37. char **outputargv, **outputargstart;
  38. int outputargc;
  39. /*
  40. * At first glance the rules appeared to be:
  41. *
  42. * - Single quotes are not special characters.
  43. *
  44. * - Double quotes are removed, but within them spaces cease
  45. * to be special.
  46. *
  47. * - Backslashes are _only_ special when a sequence of them
  48. * appear just before a double quote. In this situation,
  49. * they are treated like C backslashes: so \" just gives a
  50. * literal quote, \\" gives a literal backslash and then
  51. * opens or closes a double-quoted segment, \\\" gives a
  52. * literal backslash and then a literal quote, \\\\" gives
  53. * two literal backslashes and then opens/closes a
  54. * double-quoted segment, and so forth. Note that this
  55. * behaviour is identical inside and outside double quotes.
  56. *
  57. * - Two successive double quotes become one literal double
  58. * quote, but only _inside_ a double-quoted segment.
  59. * Outside, they just form an empty double-quoted segment
  60. * (which may cause an empty argument word).
  61. *
  62. * - That only leaves the interesting question of what happens
  63. * when one or more backslashes precedes two or more double
  64. * quotes, starting inside a double-quoted string. And the
  65. * answer to that appears somewhat bizarre. Here I tabulate
  66. * number of backslashes (across the top) against number of
  67. * quotes (down the left), and indicate how many backslashes
  68. * are output, how many quotes are output, and whether a
  69. * quoted segment is open at the end of the sequence:
  70. *
  71. * backslashes
  72. *
  73. * 0 1 2 3 4
  74. *
  75. * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
  76. * --------+-----------------------------
  77. * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
  78. * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
  79. * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
  80. * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
  81. * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
  82. * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
  83. * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
  84. * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
  85. * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
  86. * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
  87. * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
  88. *
  89. *
  90. * [Test fragment was of the form "a\\\"""b c" d.]
  91. *
  92. * There is very weird mod-3 behaviour going on here in the
  93. * number of quotes, and it even applies when there aren't any
  94. * backslashes! How ghastly.
  95. *
  96. * With a bit of thought, this extremely odd diagram suddenly
  97. * coalesced itself into a coherent, if still ghastly, model of
  98. * how things work:
  99. *
  100. * - As before, backslashes are only special when one or more
  101. * of them appear contiguously before at least one double
  102. * quote. In this situation the backslashes do exactly what
  103. * you'd expect: each one quotes the next thing in front of
  104. * it, so you end up with n/2 literal backslashes (if n is
  105. * even) or (n-1)/2 literal backslashes and a literal quote
  106. * (if n is odd). In the latter case the double quote
  107. * character right after the backslashes is used up.
  108. *
  109. * - After that, any remaining double quotes are processed. A
  110. * string of contiguous unescaped double quotes has a mod-3
  111. * behaviour:
  112. *
  113. * * inside a quoted segment, a quote ends the segment.
  114. * * _immediately_ after ending a quoted segment, a quote
  115. * simply produces a literal quote.
  116. * * otherwise, outside a quoted segment, a quote begins a
  117. * quoted segment.
  118. *
  119. * So, for example, if we started inside a quoted segment
  120. * then two contiguous quotes would close the segment and
  121. * produce a literal quote; three would close the segment,
  122. * produce a literal quote, and open a new segment. If we
  123. * started outside a quoted segment, then two contiguous
  124. * quotes would open and then close a segment, producing no
  125. * output (but potentially creating a zero-length argument);
  126. * but three quotes would open and close a segment and then
  127. * produce a literal quote.
  128. */
  129. /*
  130. * First deal with the simplest of all special cases: if there
  131. * aren't any arguments, return 0,NULL,NULL.
  132. */
  133. while (*cmdline && isspace(*cmdline)) cmdline++;
  134. if (!*cmdline) {
  135. if (argc) *argc = 0;
  136. if (argv) *argv = NULL;
  137. if (argstart) *argstart = NULL;
  138. return;
  139. }
  140. /*
  141. * This will guaranteeably be big enough; we can realloc it
  142. * down later.
  143. */
  144. outputline = snewn(1+strlen(cmdline), char);
  145. outputargv = snewn(strlen(cmdline)+1 / 2, char *);
  146. outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
  147. p = cmdline; q = outputline; outputargc = 0;
  148. while (*p) {
  149. int quote;
  150. /* Skip whitespace searching for start of argument. */
  151. while (*p && isspace(*p)) p++;
  152. if (!*p) break;
  153. /* We have an argument; start it. */
  154. outputargv[outputargc] = q;
  155. outputargstart[outputargc] = p;
  156. outputargc++;
  157. quote = 0;
  158. /* Copy data into the argument until it's finished. */
  159. while (*p) {
  160. if (!quote && isspace(*p))
  161. break; /* argument is finished */
  162. if (*p == '"' || *p == '\\') {
  163. /*
  164. * We have a sequence of zero or more backslashes
  165. * followed by a sequence of zero or more quotes.
  166. * Count up how many of each, and then deal with
  167. * them as appropriate.
  168. */
  169. int i, slashes = 0, quotes = 0;
  170. while (*p == '\\') slashes++, p++;
  171. while (*p == '"') quotes++, p++;
  172. if (!quotes) {
  173. /*
  174. * Special case: if there are no quotes,
  175. * slashes are not special at all, so just copy
  176. * n slashes to the output string.
  177. */
  178. while (slashes--) *q++ = '\\';
  179. } else {
  180. /* Slashes annihilate in pairs. */
  181. while (slashes >= 2) slashes -= 2, *q++ = '\\';
  182. /* One remaining slash takes out the first quote. */
  183. if (slashes) quotes--, *q++ = '"';
  184. if (quotes > 0) {
  185. /* Outside a quote segment, a quote starts one. */
  186. if (!quote) quotes--, quote = 1;
  187. /* Now we produce (n+1)/3 literal quotes... */
  188. for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
  189. /* ... and end in a quote segment iff 3 divides n. */
  190. quote = (quotes % 3 == 0);
  191. }
  192. }
  193. } else {
  194. *q++ = *p++;
  195. }
  196. }
  197. /* At the end of an argument, just append a trailing NUL. */
  198. *q++ = '\0';
  199. }
  200. outputargv = sresize(outputargv, outputargc, char *);
  201. outputargstart = sresize(outputargstart, outputargc, char *);
  202. if (argc) *argc = outputargc;
  203. if (argv) *argv = outputargv; else sfree(outputargv);
  204. if (argstart) *argstart = outputargstart; else sfree(outputargstart);
  205. }
  206. #ifdef TESTMODE
  207. const struct argv_test {
  208. const char *cmdline;
  209. const char *argv[10];
  210. } argv_tests[] = {
  211. /*
  212. * We generate this set of tests by invoking ourself with
  213. * `-generate'.
  214. */
  215. {"ab c\" d", {"ab", "c d", NULL}},
  216. {"a\"b c\" d", {"ab c", "d", NULL}},
  217. {"a\"\"b c\" d", {"ab", "c d", NULL}},
  218. {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
  219. {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
  220. {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
  221. {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  222. {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
  223. {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  224. {"a\\b c\" d", {"a\\b", "c d", NULL}},
  225. {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
  226. {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
  227. {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
  228. {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  229. {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
  230. {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  231. {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
  232. {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
  233. {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
  234. {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
  235. {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
  236. {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  237. {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
  238. {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  239. {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  240. {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
  241. {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  242. {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
  243. {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
  244. {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
  245. {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  246. {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  247. {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
  248. {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  249. {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
  250. {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
  251. {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
  252. {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
  253. {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
  254. {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
  255. {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
  256. {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
  257. {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
  258. {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
  259. {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
  260. {"\"ab c\" d", {"ab c", "d", NULL}},
  261. {"\"a\"b c\" d", {"ab", "c d", NULL}},
  262. {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
  263. {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
  264. {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
  265. {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  266. {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
  267. {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  268. {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
  269. {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
  270. {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
  271. {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
  272. {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  273. {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
  274. {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
  275. {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
  276. {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
  277. {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
  278. {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
  279. {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
  280. {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  281. {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
  282. {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  283. {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  284. {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
  285. {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  286. {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
  287. {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
  288. {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
  289. {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
  290. {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  291. {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
  292. {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
  293. {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
  294. {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
  295. {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
  296. {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
  297. {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
  298. {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
  299. {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
  300. {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
  301. {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
  302. {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
  303. {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
  304. {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
  305. };
  306. int main(int argc, char **argv)
  307. {
  308. int i, j;
  309. if (argc > 1) {
  310. /*
  311. * Generation of tests.
  312. *
  313. * Given `-splat <args>', we print out a C-style
  314. * representation of each argument (in the form "a", "b",
  315. * NULL), backslash-escaping each backslash and double
  316. * quote.
  317. *
  318. * Given `-split <string>', we first doctor `string' by
  319. * turning forward slashes into backslashes, single quotes
  320. * into double quotes and underscores into spaces; and then
  321. * we feed the resulting string to ourself with `-splat'.
  322. *
  323. * Given `-generate', we concoct a variety of fun test
  324. * cases, encode them in quote-safe form (mapping \, " and
  325. * space to /, ' and _ respectively) and feed each one to
  326. * `-split'.
  327. */
  328. if (!strcmp(argv[1], "-splat")) {
  329. int i;
  330. char *p;
  331. for (i = 2; i < argc; i++) {
  332. putchar('"');
  333. for (p = argv[i]; *p; p++) {
  334. if (*p == '\\' || *p == '"')
  335. putchar('\\');
  336. putchar(*p);
  337. }
  338. printf("\", ");
  339. }
  340. printf("NULL");
  341. return 0;
  342. }
  343. if (!strcmp(argv[1], "-split") && argc > 2) {
  344. char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
  345. char *p, *q;
  346. q = str + sprintf(str, "%s -splat ", argv[0]);
  347. printf(" {\"");
  348. for (p = argv[2]; *p; p++, q++) {
  349. switch (*p) {
  350. case '/': printf("\\\\"); *q = '\\'; break;
  351. case '\'': printf("\\\""); *q = '"'; break;
  352. case '_': printf(" "); *q = ' '; break;
  353. default: putchar(*p); *q = *p; break;
  354. }
  355. }
  356. *p = '\0';
  357. printf("\", {");
  358. fflush(stdout);
  359. system(str);
  360. printf("}},\n");
  361. return 0;
  362. }
  363. if (!strcmp(argv[1], "-generate")) {
  364. char *teststr, *p;
  365. int i, initialquote, backslashes, quotes;
  366. teststr = malloc(200 + strlen(argv[0]));
  367. for (initialquote = 0; initialquote <= 1; initialquote++) {
  368. for (backslashes = 0; backslashes < 5; backslashes++) {
  369. for (quotes = 0; quotes < 9; quotes++) {
  370. p = teststr + sprintf(teststr, "%s -split ", argv[0]);
  371. if (initialquote) *p++ = '\'';
  372. *p++ = 'a';
  373. for (i = 0; i < backslashes; i++) *p++ = '/';
  374. for (i = 0; i < quotes; i++) *p++ = '\'';
  375. *p++ = 'b';
  376. *p++ = '_';
  377. *p++ = 'c';
  378. *p++ = '\'';
  379. *p++ = '_';
  380. *p++ = 'd';
  381. *p = '\0';
  382. system(teststr);
  383. }
  384. }
  385. }
  386. return 0;
  387. }
  388. fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
  389. return 1;
  390. }
  391. /*
  392. * If we get here, we were invoked with no arguments, so just
  393. * run the tests.
  394. */
  395. for (i = 0; i < lenof(argv_tests); i++) {
  396. int ac;
  397. char **av;
  398. split_into_argv(argv_tests[i].cmdline, &ac, &av);
  399. for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
  400. if (strcmp(av[j], argv_tests[i].argv[j])) {
  401. printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
  402. i, argv_tests[i].cmdline,
  403. j, av[j], argv_tests[i].argv[j]);
  404. }
  405. #ifdef VERBOSE
  406. else {
  407. printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
  408. i, argv_tests[i].cmdline,
  409. j, av[j], argv_tests[i].argv[j]);
  410. }
  411. #endif
  412. }
  413. if (j < ac)
  414. printf("failed test %d (|%s|): %d args returned, should be %d\n",
  415. i, argv_tests[i].cmdline, ac, j);
  416. if (argv_tests[i].argv[j])
  417. printf("failed test %d (|%s|): %d args returned, should be more\n",
  418. i, argv_tests[i].cmdline, ac);
  419. }
  420. return 0;
  421. }
  422. #endif