123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- /*
- * winutils.c: miscellaneous Windows utilities for GUI apps
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include "misc.h"
- #ifdef TESTMODE
- /* Definitions to allow this module to be compiled standalone for testing. */
- #define smalloc malloc
- #define srealloc realloc
- #define sfree free
- #endif
- /*
- * Split a complete command line into argc/argv, attempting to do
- * it exactly the same way Windows itself would do it (so that
- * console utilities, which receive argc and argv from Windows,
- * will have their command lines processed in the same way as GUI
- * utilities which get a whole command line and must break it
- * themselves).
- *
- * Does not modify the input command line.
- *
- * The final parameter (argstart) is used to return a second array
- * of char * pointers, the same length as argv, each one pointing
- * at the start of the corresponding element of argv in the
- * original command line. So if you get half way through processing
- * your command line in argc/argv form and then decide you want to
- * treat the rest as a raw string, you can. If you don't want to,
- * `argstart' can be safely left NULL.
- */
- void split_into_argv(char *cmdline, int *argc, char ***argv,
- char ***argstart)
- {
- char *p;
- char *outputline, *q;
- char **outputargv, **outputargstart;
- int outputargc;
- /*
- * At first glance the rules appeared to be:
- *
- * - Single quotes are not special characters.
- *
- * - Double quotes are removed, but within them spaces cease
- * to be special.
- *
- * - Backslashes are _only_ special when a sequence of them
- * appear just before a double quote. In this situation,
- * they are treated like C backslashes: so \" just gives a
- * literal quote, \\" gives a literal backslash and then
- * opens or closes a double-quoted segment, \\\" gives a
- * literal backslash and then a literal quote, \\\\" gives
- * two literal backslashes and then opens/closes a
- * double-quoted segment, and so forth. Note that this
- * behaviour is identical inside and outside double quotes.
- *
- * - Two successive double quotes become one literal double
- * quote, but only _inside_ a double-quoted segment.
- * Outside, they just form an empty double-quoted segment
- * (which may cause an empty argument word).
- *
- * - That only leaves the interesting question of what happens
- * when one or more backslashes precedes two or more double
- * quotes, starting inside a double-quoted string. And the
- * answer to that appears somewhat bizarre. Here I tabulate
- * number of backslashes (across the top) against number of
- * quotes (down the left), and indicate how many backslashes
- * are output, how many quotes are output, and whether a
- * quoted segment is open at the end of the sequence:
- *
- * backslashes
- *
- * 0 1 2 3 4
- *
- * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
- * --------+-----------------------------
- * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
- * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
- * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
- * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
- * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
- * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
- * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
- * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
- * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
- * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
- * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
- *
- *
- * [Test fragment was of the form "a\\\"""b c" d.]
- *
- * There is very weird mod-3 behaviour going on here in the
- * number of quotes, and it even applies when there aren't any
- * backslashes! How ghastly.
- *
- * With a bit of thought, this extremely odd diagram suddenly
- * coalesced itself into a coherent, if still ghastly, model of
- * how things work:
- *
- * - As before, backslashes are only special when one or more
- * of them appear contiguously before at least one double
- * quote. In this situation the backslashes do exactly what
- * you'd expect: each one quotes the next thing in front of
- * it, so you end up with n/2 literal backslashes (if n is
- * even) or (n-1)/2 literal backslashes and a literal quote
- * (if n is odd). In the latter case the double quote
- * character right after the backslashes is used up.
- *
- * - After that, any remaining double quotes are processed. A
- * string of contiguous unescaped double quotes has a mod-3
- * behaviour:
- *
- * * inside a quoted segment, a quote ends the segment.
- * * _immediately_ after ending a quoted segment, a quote
- * simply produces a literal quote.
- * * otherwise, outside a quoted segment, a quote begins a
- * quoted segment.
- *
- * So, for example, if we started inside a quoted segment
- * then two contiguous quotes would close the segment and
- * produce a literal quote; three would close the segment,
- * produce a literal quote, and open a new segment. If we
- * started outside a quoted segment, then two contiguous
- * quotes would open and then close a segment, producing no
- * output (but potentially creating a zero-length argument);
- * but three quotes would open and close a segment and then
- * produce a literal quote.
- */
- /*
- * First deal with the simplest of all special cases: if there
- * aren't any arguments, return 0,NULL,NULL.
- */
- while (*cmdline && isspace(*cmdline)) cmdline++;
- if (!*cmdline) {
- if (argc) *argc = 0;
- if (argv) *argv = NULL;
- if (argstart) *argstart = NULL;
- return;
- }
- /*
- * This will guaranteeably be big enough; we can realloc it
- * down later.
- */
- outputline = snewn(1+strlen(cmdline), char);
- outputargv = snewn(strlen(cmdline)+1 / 2, char *);
- outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
- p = cmdline; q = outputline; outputargc = 0;
- while (*p) {
- int quote;
- /* Skip whitespace searching for start of argument. */
- while (*p && isspace(*p)) p++;
- if (!*p) break;
- /* We have an argument; start it. */
- outputargv[outputargc] = q;
- outputargstart[outputargc] = p;
- outputargc++;
- quote = 0;
- /* Copy data into the argument until it's finished. */
- while (*p) {
- if (!quote && isspace(*p))
- break; /* argument is finished */
- if (*p == '"' || *p == '\\') {
- /*
- * We have a sequence of zero or more backslashes
- * followed by a sequence of zero or more quotes.
- * Count up how many of each, and then deal with
- * them as appropriate.
- */
- int i, slashes = 0, quotes = 0;
- while (*p == '\\') slashes++, p++;
- while (*p == '"') quotes++, p++;
- if (!quotes) {
- /*
- * Special case: if there are no quotes,
- * slashes are not special at all, so just copy
- * n slashes to the output string.
- */
- while (slashes--) *q++ = '\\';
- } else {
- /* Slashes annihilate in pairs. */
- while (slashes >= 2) slashes -= 2, *q++ = '\\';
- /* One remaining slash takes out the first quote. */
- if (slashes) quotes--, *q++ = '"';
- if (quotes > 0) {
- /* Outside a quote segment, a quote starts one. */
- if (!quote) quotes--, quote = 1;
- /* Now we produce (n+1)/3 literal quotes... */
- for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
- /* ... and end in a quote segment iff 3 divides n. */
- quote = (quotes % 3 == 0);
- }
- }
- } else {
- *q++ = *p++;
- }
- }
- /* At the end of an argument, just append a trailing NUL. */
- *q++ = '\0';
- }
- outputargv = sresize(outputargv, outputargc, char *);
- outputargstart = sresize(outputargstart, outputargc, char *);
- if (argc) *argc = outputargc;
- if (argv) *argv = outputargv; else sfree(outputargv);
- if (argstart) *argstart = outputargstart; else sfree(outputargstart);
- }
- #ifdef TESTMODE
- const struct argv_test {
- const char *cmdline;
- const char *argv[10];
- } argv_tests[] = {
- /*
- * We generate this set of tests by invoking ourself with
- * `-generate'.
- */
- {"ab c\" d", {"ab", "c d", NULL}},
- {"a\"b c\" d", {"ab c", "d", NULL}},
- {"a\"\"b c\" d", {"ab", "c d", NULL}},
- {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
- {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
- {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"a\\b c\" d", {"a\\b", "c d", NULL}},
- {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
- {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
- {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
- {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
- {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
- {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
- {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
- {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
- {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
- {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
- {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
- {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
- {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
- {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
- {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
- {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
- {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
- {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
- {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
- {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
- {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
- {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
- {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
- {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
- {"\"ab c\" d", {"ab c", "d", NULL}},
- {"\"a\"b c\" d", {"ab", "c d", NULL}},
- {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
- {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
- {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
- {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
- {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
- {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
- {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
- {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
- {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
- {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
- {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
- {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
- {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
- {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
- {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
- {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
- {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
- {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
- {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
- {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
- {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
- {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
- {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
- {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
- {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
- {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
- {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
- {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
- {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
- {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
- {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
- {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
- {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
- };
- int main(int argc, char **argv)
- {
- int i, j;
- if (argc > 1) {
- /*
- * Generation of tests.
- *
- * Given `-splat <args>', we print out a C-style
- * representation of each argument (in the form "a", "b",
- * NULL), backslash-escaping each backslash and double
- * quote.
- *
- * Given `-split <string>', we first doctor `string' by
- * turning forward slashes into backslashes, single quotes
- * into double quotes and underscores into spaces; and then
- * we feed the resulting string to ourself with `-splat'.
- *
- * Given `-generate', we concoct a variety of fun test
- * cases, encode them in quote-safe form (mapping \, " and
- * space to /, ' and _ respectively) and feed each one to
- * `-split'.
- */
- if (!strcmp(argv[1], "-splat")) {
- int i;
- char *p;
- for (i = 2; i < argc; i++) {
- putchar('"');
- for (p = argv[i]; *p; p++) {
- if (*p == '\\' || *p == '"')
- putchar('\\');
- putchar(*p);
- }
- printf("\", ");
- }
- printf("NULL");
- return 0;
- }
- if (!strcmp(argv[1], "-split") && argc > 2) {
- char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
- char *p, *q;
- q = str + sprintf(str, "%s -splat ", argv[0]);
- printf(" {\"");
- for (p = argv[2]; *p; p++, q++) {
- switch (*p) {
- case '/': printf("\\\\"); *q = '\\'; break;
- case '\'': printf("\\\""); *q = '"'; break;
- case '_': printf(" "); *q = ' '; break;
- default: putchar(*p); *q = *p; break;
- }
- }
- *p = '\0';
- printf("\", {");
- fflush(stdout);
- system(str);
- printf("}},\n");
- return 0;
- }
- if (!strcmp(argv[1], "-generate")) {
- char *teststr, *p;
- int i, initialquote, backslashes, quotes;
- teststr = malloc(200 + strlen(argv[0]));
- for (initialquote = 0; initialquote <= 1; initialquote++) {
- for (backslashes = 0; backslashes < 5; backslashes++) {
- for (quotes = 0; quotes < 9; quotes++) {
- p = teststr + sprintf(teststr, "%s -split ", argv[0]);
- if (initialquote) *p++ = '\'';
- *p++ = 'a';
- for (i = 0; i < backslashes; i++) *p++ = '/';
- for (i = 0; i < quotes; i++) *p++ = '\'';
- *p++ = 'b';
- *p++ = '_';
- *p++ = 'c';
- *p++ = '\'';
- *p++ = '_';
- *p++ = 'd';
- *p = '\0';
- system(teststr);
- }
- }
- }
- return 0;
- }
- fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
- return 1;
- }
- /*
- * If we get here, we were invoked with no arguments, so just
- * run the tests.
- */
- for (i = 0; i < lenof(argv_tests); i++) {
- int ac;
- char **av;
- split_into_argv(argv_tests[i].cmdline, &ac, &av);
- for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
- if (strcmp(av[j], argv_tests[i].argv[j])) {
- printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
- i, argv_tests[i].cmdline,
- j, av[j], argv_tests[i].argv[j]);
- }
- #ifdef VERBOSE
- else {
- printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
- i, argv_tests[i].cmdline,
- j, av[j], argv_tests[i].argv[j]);
- }
- #endif
- }
- if (j < ac)
- printf("failed test %d (|%s|): %d args returned, should be %d\n",
- i, argv_tests[i].cmdline, ac, j);
- if (argv_tests[i].argv[j])
- printf("failed test %d (|%s|): %d args returned, should be more\n",
- i, argv_tests[i].cmdline, ac);
- }
- return 0;
- }
- #endif
|