urlglob.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) 1998 - 2004, Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at http://curl.haxx.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. * $Id: urlglob.c,v 1.31 2004/03/08 12:51:13 bagder Exp $
  22. ***************************************************************************/
  23. /* client-local setup.h */
  24. #include "setup.h"
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <ctype.h>
  29. #include <curl/curl.h>
  30. #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
  31. #include <curl/mprintf.h>
  32. #include "urlglob.h"
  33. #ifdef CURLDEBUG
  34. #include "../lib/memdebug.h"
  35. #endif
  36. typedef enum {
  37. GLOB_OK,
  38. GLOB_ERROR
  39. } GlobCode;
  40. /*
  41. * glob_word()
  42. *
  43. * Input a full globbed string, set the forth argument to the amount of
  44. * strings we get out of this. Return GlobCode.
  45. */
  46. static GlobCode glob_word(URLGlob *, /* object anchor */
  47. char *, /* globbed string */
  48. int, /* position */
  49. int *); /* returned number of strings */
  50. static GlobCode glob_set(URLGlob *glob, char *pattern, int pos, int *amount)
  51. {
  52. /* processes a set expression with the point behind the opening '{'
  53. ','-separated elements are collected until the next closing '}'
  54. */
  55. char* buf = glob->glob_buffer;
  56. URLPattern *pat;
  57. pat = (URLPattern*)&glob->pattern[glob->size / 2];
  58. /* patterns 0,1,2,... correspond to size=1,3,5,... */
  59. pat->type = UPTSet;
  60. pat->content.Set.size = 0;
  61. pat->content.Set.ptr_s = 0;
  62. pat->content.Set.elements = (char**)malloc(0);
  63. ++glob->size;
  64. while (1) {
  65. switch (*pattern) {
  66. case '\0': /* URL ended while set was still open */
  67. snprintf(glob->errormsg, sizeof(glob->errormsg),
  68. "unmatched brace at pos %d\n", pos);
  69. return GLOB_ERROR;
  70. case '{':
  71. case '[': /* no nested expressions at this time */
  72. snprintf(glob->errormsg, sizeof(glob->errormsg),
  73. "nested braces not supported at pos %d\n", pos);
  74. return GLOB_ERROR;
  75. case ',':
  76. case '}': /* set element completed */
  77. *buf = '\0';
  78. pat->content.Set.elements =
  79. realloc(pat->content.Set.elements,
  80. (pat->content.Set.size + 1) * sizeof(char*));
  81. if (!pat->content.Set.elements) {
  82. snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
  83. return GLOB_ERROR;
  84. }
  85. pat->content.Set.elements[pat->content.Set.size] =
  86. strdup(glob->glob_buffer);
  87. ++pat->content.Set.size;
  88. if (*pattern == '}') {
  89. /* entire set pattern completed */
  90. int wordamount;
  91. /* always check for a literal (may be "") between patterns */
  92. if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount))
  93. wordamount=1;
  94. *amount = pat->content.Set.size * wordamount;
  95. return GLOB_OK;
  96. }
  97. buf = glob->glob_buffer;
  98. ++pattern;
  99. ++pos;
  100. break;
  101. case ']': /* illegal closing bracket */
  102. snprintf(glob->errormsg, sizeof(glob->errormsg),
  103. "illegal pattern at pos %d\n", pos);
  104. return GLOB_ERROR;
  105. case '\\': /* escaped character, skip '\' */
  106. if (*(buf+1) == '\0') { /* but no escaping of '\0'! */
  107. snprintf(glob->errormsg, sizeof(glob->errormsg),
  108. "illegal pattern at pos %d\n", pos);
  109. return GLOB_ERROR;
  110. }
  111. ++pattern;
  112. ++pos; /* intentional fallthrough */
  113. default:
  114. *buf++ = *pattern++; /* copy character to set element */
  115. ++pos;
  116. }
  117. }
  118. /* we never reach this point */
  119. }
  120. static GlobCode glob_range(URLGlob *glob, char *pattern, int pos, int *amount)
  121. {
  122. /* processes a range expression with the point behind the opening '['
  123. - char range: e.g. "a-z]", "B-Q]"
  124. - num range: e.g. "0-9]", "17-2000]"
  125. - num range with leading zeros: e.g. "001-999]"
  126. expression is checked for well-formedness and collected until the next ']'
  127. */
  128. URLPattern *pat;
  129. char *c;
  130. int wordamount=1;
  131. pat = (URLPattern*)&glob->pattern[glob->size / 2];
  132. /* patterns 0,1,2,... correspond to size=1,3,5,... */
  133. ++glob->size;
  134. if (isalpha((int)*pattern)) { /* character range detected */
  135. pat->type = UPTCharRange;
  136. if (sscanf(pattern, "%c-%c]", &pat->content.CharRange.min_c,
  137. &pat->content.CharRange.max_c) != 2 ||
  138. pat->content.CharRange.min_c >= pat->content.CharRange.max_c ||
  139. pat->content.CharRange.max_c - pat->content.CharRange.min_c > 'z' - 'a') {
  140. /* the pattern is not well-formed */
  141. snprintf(glob->errormsg, sizeof(glob->errormsg),
  142. "illegal pattern or range specification after pos %d\n", pos);
  143. return GLOB_ERROR;
  144. }
  145. pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
  146. /* always check for a literal (may be "") between patterns */
  147. if(GLOB_ERROR == glob_word(glob, pattern + 4, pos + 4, &wordamount))
  148. wordamount=1;
  149. *amount = (pat->content.CharRange.max_c -
  150. pat->content.CharRange.min_c + 1) *
  151. wordamount;
  152. return GLOB_OK;
  153. }
  154. if (isdigit((int)*pattern)) { /* numeric range detected */
  155. pat->type = UPTNumRange;
  156. pat->content.NumRange.padlength = 0;
  157. if (sscanf(pattern, "%d-%d]",
  158. &pat->content.NumRange.min_n,
  159. &pat->content.NumRange.max_n) != 2 ||
  160. pat->content.NumRange.min_n >= pat->content.NumRange.max_n) {
  161. /* the pattern is not well-formed */
  162. snprintf(glob->errormsg, sizeof(glob->errormsg),
  163. "error: illegal pattern or range specification after pos %d\n",
  164. pos);
  165. return GLOB_ERROR;
  166. }
  167. if (*pattern == '0') { /* leading zero specified */
  168. c = pattern;
  169. while (isdigit((int)*c++))
  170. ++pat->content.NumRange.padlength; /* padding length is set for all
  171. instances of this pattern */
  172. }
  173. pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
  174. c = (char*)strchr(pattern, ']'); /* continue after next ']' */
  175. if(c)
  176. c++;
  177. else {
  178. snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
  179. return GLOB_ERROR; /* missing ']' */
  180. }
  181. /* always check for a literal (may be "") between patterns */
  182. if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
  183. wordamount = 1;
  184. *amount = (pat->content.NumRange.max_n -
  185. pat->content.NumRange.min_n + 1) *
  186. wordamount;
  187. return GLOB_OK;
  188. }
  189. snprintf(glob->errormsg, sizeof(glob->errormsg),
  190. "illegal character in range specification at pos %d\n", pos);
  191. return GLOB_ERROR;
  192. }
  193. static GlobCode glob_word(URLGlob *glob, char *pattern, int pos, int *amount)
  194. {
  195. /* processes a literal string component of a URL
  196. special characters '{' and '[' branch to set/range processing functions
  197. */
  198. char* buf = glob->glob_buffer;
  199. int litindex;
  200. GlobCode res = GLOB_OK;
  201. *amount = 1; /* default is one single string */
  202. while (*pattern != '\0' && *pattern != '{' && *pattern != '[') {
  203. if (*pattern == '}' || *pattern == ']')
  204. return GLOB_ERROR;
  205. /* only allow \ to escape known "special letters" */
  206. if (*pattern == '\\' &&
  207. (*(pattern+1) == '{' || *(pattern+1) == '[' ||
  208. *(pattern+1) == '}' || *(pattern+1) == ']') ) {
  209. /* escape character, skip '\' */
  210. ++pattern;
  211. ++pos;
  212. if (*pattern == '\0') /* but no escaping of '\0'! */
  213. return GLOB_ERROR;
  214. }
  215. *buf++ = *pattern++; /* copy character to literal */
  216. ++pos;
  217. }
  218. *buf = '\0';
  219. litindex = glob->size / 2;
  220. /* literals 0,1,2,... correspond to size=0,2,4,... */
  221. glob->literal[litindex] = strdup(glob->glob_buffer);
  222. if(!glob->literal[litindex])
  223. return GLOB_ERROR;
  224. ++glob->size;
  225. switch (*pattern) {
  226. case '\0':
  227. break; /* singular URL processed */
  228. case '{':
  229. /* process set pattern */
  230. res = glob_set(glob, ++pattern, ++pos, amount);
  231. break;
  232. case '[':
  233. /* process range pattern */
  234. res= glob_range(glob, ++pattern, ++pos, amount);
  235. break;
  236. }
  237. if(GLOB_OK != res)
  238. /* free that strdup'ed string again */
  239. free(glob->literal[litindex]);
  240. return res; /* something got wrong */
  241. }
  242. int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
  243. {
  244. /*
  245. * We can deal with any-size, just make a buffer with the same length
  246. * as the specified URL!
  247. */
  248. URLGlob *glob_expand;
  249. int amount;
  250. char *glob_buffer=(char *)malloc(strlen(url)+1);
  251. *glob = NULL;
  252. if(NULL == glob_buffer)
  253. return CURLE_OUT_OF_MEMORY;
  254. glob_expand = (URLGlob*)malloc(sizeof(URLGlob));
  255. if(NULL == glob_expand) {
  256. free(glob_buffer);
  257. return CURLE_OUT_OF_MEMORY;
  258. }
  259. glob_expand->size = 0;
  260. glob_expand->urllen = strlen(url);
  261. glob_expand->glob_buffer = glob_buffer;
  262. glob_expand->beenhere=0;
  263. if(GLOB_OK == glob_word(glob_expand, url, 1, &amount))
  264. *urlnum = amount;
  265. else {
  266. if(error && glob_expand->errormsg[0]) {
  267. /* send error description to the error-stream */
  268. fprintf(error, "curl: (%d) [globbing] %s\n",
  269. CURLE_URL_MALFORMAT, glob_expand->errormsg);
  270. }
  271. /* it failed, we cleanup */
  272. free(glob_buffer);
  273. free(glob_expand);
  274. glob_expand = NULL;
  275. *urlnum = 1;
  276. return CURLE_URL_MALFORMAT;
  277. }
  278. *glob = glob_expand;
  279. return CURLE_OK;
  280. }
  281. void glob_cleanup(URLGlob* glob)
  282. {
  283. int i, elem;
  284. for (i = glob->size - 1; i >= 0; --i) {
  285. if (!(i & 1)) { /* even indexes contain literals */
  286. free(glob->literal[i/2]);
  287. }
  288. else { /* odd indexes contain sets or ranges */
  289. if (glob->pattern[i/2].type == UPTSet) {
  290. for (elem = glob->pattern[i/2].content.Set.size - 1;
  291. elem >= 0;
  292. --elem) {
  293. free(glob->pattern[i/2].content.Set.elements[elem]);
  294. }
  295. free(glob->pattern[i/2].content.Set.elements);
  296. }
  297. }
  298. }
  299. free(glob->glob_buffer);
  300. free(glob);
  301. }
  302. char *glob_next_url(URLGlob *glob)
  303. {
  304. char *buf = glob->glob_buffer;
  305. URLPattern *pat;
  306. char *lit;
  307. signed int i;
  308. int carry;
  309. if (!glob->beenhere)
  310. glob->beenhere = 1;
  311. else {
  312. carry = 1;
  313. /* implement a counter over the index ranges of all patterns,
  314. starting with the rightmost pattern */
  315. for (i = glob->size / 2 - 1; carry && i >= 0; --i) {
  316. carry = 0;
  317. pat = &glob->pattern[i];
  318. switch (pat->type) {
  319. case UPTSet:
  320. if (++pat->content.Set.ptr_s == pat->content.Set.size) {
  321. pat->content.Set.ptr_s = 0;
  322. carry = 1;
  323. }
  324. break;
  325. case UPTCharRange:
  326. if (++pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
  327. pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
  328. carry = 1;
  329. }
  330. break;
  331. case UPTNumRange:
  332. if (++pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
  333. pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
  334. carry = 1;
  335. }
  336. break;
  337. default:
  338. printf("internal error: invalid pattern type (%d)\n", pat->type);
  339. exit (CURLE_FAILED_INIT);
  340. }
  341. }
  342. if (carry) /* first pattern ptr has run into overflow, done! */
  343. return NULL;
  344. }
  345. for (i = 0; i < glob->size; ++i) {
  346. if (!(i % 2)) { /* every other term (i even) is a literal */
  347. lit = glob->literal[i/2];
  348. strcpy(buf, lit);
  349. buf += strlen(lit);
  350. }
  351. else { /* the rest (i odd) are patterns */
  352. pat = &glob->pattern[i/2];
  353. switch(pat->type) {
  354. case UPTSet:
  355. strcpy(buf, pat->content.Set.elements[pat->content.Set.ptr_s]);
  356. buf += strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
  357. break;
  358. case UPTCharRange:
  359. *buf++ = pat->content.CharRange.ptr_c;
  360. break;
  361. case UPTNumRange:
  362. sprintf(buf, "%0*d",
  363. pat->content.NumRange.padlength, pat->content.NumRange.ptr_n);
  364. buf += strlen(buf); /* make no sprint() return code assumptions */
  365. break;
  366. default:
  367. printf("internal error: invalid pattern type (%d)\n", pat->type);
  368. exit (CURLE_FAILED_INIT);
  369. }
  370. }
  371. }
  372. *buf = '\0';
  373. return strdup(glob->glob_buffer);
  374. }
  375. char *glob_match_url(char *filename, URLGlob *glob)
  376. {
  377. char *target;
  378. int allocsize;
  379. int stringlen=0;
  380. char numbuf[18];
  381. char *appendthis = NULL;
  382. int appendlen = 0;
  383. /* We cannot use the glob_buffer for storage here since the filename may
  384. * be longer than the URL we use. We allocate a good start size, then
  385. * we need to realloc in case of need.
  386. */
  387. allocsize=strlen(filename);
  388. target = malloc(allocsize);
  389. if(NULL == target)
  390. return NULL; /* major failure */
  391. while (*filename) {
  392. if (*filename == '#' && isdigit((int)filename[1])) {
  393. /* only '#1' ... '#9' allowed */
  394. int i;
  395. unsigned long num = strtoul(&filename[1], &filename, 10);
  396. i = num-1;
  397. if (num && (i <= glob->size / 2)) {
  398. URLPattern pat = glob->pattern[i];
  399. switch (pat.type) {
  400. case UPTSet:
  401. appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
  402. appendlen =
  403. (int)strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
  404. break;
  405. case UPTCharRange:
  406. numbuf[0]=pat.content.CharRange.ptr_c;
  407. numbuf[1]=0;
  408. appendthis=numbuf;
  409. appendlen=1;
  410. break;
  411. case UPTNumRange:
  412. sprintf(numbuf, "%0*d",
  413. pat.content.NumRange.padlength,
  414. pat.content.NumRange.ptr_n);
  415. appendthis = numbuf;
  416. appendlen = (int)strlen(numbuf);
  417. break;
  418. default:
  419. printf("internal error: invalid pattern type (%d)\n",
  420. (int)pat.type);
  421. free(target);
  422. return NULL;
  423. }
  424. }
  425. }
  426. else {
  427. appendthis=filename++;
  428. appendlen=1;
  429. }
  430. if(appendlen + stringlen >= allocsize) {
  431. char *newstr;
  432. allocsize = (appendlen + stringlen)*2;
  433. newstr=realloc(target, allocsize);
  434. if(NULL ==newstr) {
  435. free(target);
  436. return NULL;
  437. }
  438. target=newstr;
  439. }
  440. memcpy(&target[stringlen], appendthis, appendlen);
  441. stringlen += appendlen;
  442. }
  443. target[stringlen]= '\0';
  444. return target;
  445. }