parser.l 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. /* -*-C-*-
  2. *
  3. * Copyright 1998-2000 Bertho A. Stultiens (BS)
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2.1 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18. *
  19. * History:
  20. * 21-May-2000 BS - Fixed the ident requirement of resource names
  21. * which can be keywords.
  22. * 30-Apr-2000 BS - Reintegration into the wine-tree
  23. * 11-Jan-2000 BS - Very drastic cleanup because we don't have a
  24. * preprocessor in here anymore.
  25. * 02-Jan-2000 BS - Removed the preprocessor code
  26. * 23-Dec-1999 BS - Removed the copyright for Martin von Loewis.
  27. * There is really nothing left of his code in
  28. * this parser.
  29. * 20-Jun-1998 BS - Changed the filename conversion. Filenames are
  30. * case-sensitive inder *nix, but not under dos.
  31. * default behaviour is to convert to lower case.
  32. * - All backslashes are converted to forward and
  33. * both single and double slash is recognized as
  34. * MS/Borland does.
  35. * - Fixed a bug in 'yywf' case that prevented
  36. * double quoted names to be scanned properly.
  37. *
  38. * 19-May-1998 BS - Started to build a preprocessor.
  39. * - Changed keyword processing completely to
  40. * table-lookups.
  41. *
  42. * 20-Apr-1998 BS - Added ';' comment stripping
  43. *
  44. * 17-Apr-1998 BS - Made the win32 keywords optional when compiling in
  45. * 16bit mode
  46. *
  47. * 15-Apr-1998 BS - Changed string handling to include escapes
  48. * - Added unicode string handling (no codepage
  49. * translation though).
  50. * - 'Borrowed' the main idea of string scanning from
  51. * the flex manual pages.
  52. * - Added conditional handling of scanning depending
  53. * on the state of the parser. This was mainly required
  54. * to distinguish a file to load or raw data that
  55. * follows. MS's definition of filenames is rather
  56. * complex... It can be unquoted or double quoted. If
  57. * double quoted, then the '\\' char is not automatically
  58. * escaped according to Borland's rc compiler, but it
  59. * accepts both "\\path\\file.rc" and "\path\file.rc".
  60. * This makes life very hard! I go for the escaped
  61. * version, as this seems to be the documented way...
  62. * - Single quoted strings are now parsed and converted
  63. * here.
  64. * - Added comment stripping. The implementation is
  65. * 'borrowed' from the flex manpages.
  66. * - Rebuild string processing so that it may contain
  67. * escaped '\0'.
  68. */
  69. /* Exclusive string handling */
  70. %x tkstr
  71. /* Exclusive unicode string handling */
  72. %x tklstr
  73. /* Exclusive rcdata single quoted data handling */
  74. %x tkrcd
  75. /* Exclusive comment eating... */
  76. %x comment
  77. /* Set when stripping c-junk */
  78. %x pp_cstrip
  79. /* Set when scanning #line style directives */
  80. %x pp_line
  81. /* Set when scanning #pragma */
  82. %x pp_pragma
  83. %x pp_code_page
  84. %option stack
  85. %option noinput nounput noyy_top_state noyywrap
  86. %option 8bit never-interactive
  87. %option prefix="parser_"
  88. /* Some shortcut definitions */
  89. ws [ \f\t\r]
  90. %{
  91. /*#define LEX_DEBUG*/
  92. #include "config.h"
  93. #include <stdio.h>
  94. #include <stdlib.h>
  95. #include <string.h>
  96. #include <ctype.h>
  97. #include <assert.h>
  98. #include <errno.h>
  99. #include <limits.h>
  100. #ifdef HAVE_UNISTD_H
  101. #include <unistd.h>
  102. #else
  103. #define YY_NO_UNISTD_H
  104. #endif
  105. #include "wrc.h"
  106. #include "utils.h"
  107. #include "parser.h"
  108. #include "newstruc.h"
  109. #include "parser.tab.h"
  110. /* Always update the current character position within a line */
  111. #define YY_USER_ACTION char_number+=yyleng; wanted_id = want_id; want_id = 0;
  112. #define YY_USER_INIT current_codepage = utf8_input ? CP_UTF8 : -1;
  113. static void addcchar(char c);
  114. static void addwchar(WCHAR s);
  115. static string_t *get_buffered_cstring(void);
  116. static string_t *get_buffered_wstring(void);
  117. static string_t *make_string(char *s);
  118. static char *cbuffer; /* Buffers for string collection */
  119. static int cbufidx;
  120. static int cbufalloc = 0;
  121. static WCHAR *wbuffer;
  122. static int wbufidx;
  123. static int wbufalloc = 0;
  124. static int current_codepage = -1; /* use language default */
  125. /*
  126. * This one is a bit tricky.
  127. * We set 'want_id' in the parser to get the first
  128. * identifier we get across in the scanner, but we
  129. * also want it to be reset at nearly any token we
  130. * see. Exceptions are:
  131. * - newlines
  132. * - comments
  133. * - whitespace
  134. *
  135. * The scanner will automatically reset 'want_id'
  136. * after *each* scanner reduction and puts is value
  137. * into the var below. In this way we can see the
  138. * state after the YY_RULE_SETUP (i.e. the user action;
  139. * see above) and don't have to worry too much when
  140. * it needs to be reset.
  141. */
  142. static int wanted_id = 0;
  143. static int save_wanted_id; /* To save across comment reductions */
  144. struct keyword {
  145. const char *keyword;
  146. int token;
  147. int isextension;
  148. int needcase;
  149. int alwayskw;
  150. };
  151. static struct keyword keywords[] = {
  152. { "ACCELERATORS", tACCELERATORS, 0, 0, 0},
  153. { "ALT", tALT, 0, 0, 0},
  154. { "ASCII", tASCII, 0, 0, 0},
  155. { "AUTO3STATE", tAUTO3STATE, 1, 0, 0},
  156. { "AUTOCHECKBOX", tAUTOCHECKBOX, 1, 0, 0},
  157. { "AUTORADIOBUTTON", tAUTORADIOBUTTON, 1, 0, 0},
  158. { "BEGIN", tBEGIN, 0, 0, 0},
  159. { "BITMAP", tBITMAP, 0, 0, 0},
  160. { "BLOCK", tBLOCK, 0, 0, 0},
  161. { "BUTTON", tBUTTON, 1, 0, 0},
  162. { "CAPTION", tCAPTION, 0, 0, 0},
  163. { "CHARACTERISTICS", tCHARACTERISTICS, 1, 0, 0},
  164. { "CHECKBOX", tCHECKBOX, 0, 0, 0},
  165. { "CHECKED", tCHECKED, 0, 0, 0},
  166. { "CLASS", tCLASS, 0, 0, 0},
  167. { "COMBOBOX", tCOMBOBOX, 0, 0, 0},
  168. { "CONTROL", tCONTROL, 0, 0, 0},
  169. { "CTEXT", tCTEXT, 0, 0, 0},
  170. { "CURSOR", tCURSOR, 0, 0, 0},
  171. { "DEFPUSHBUTTON", tDEFPUSHBUTTON, 0, 0, 0},
  172. { "DIALOG", tDIALOG, 0, 0, 0},
  173. { "DIALOGEX", tDIALOGEX, 1, 0, 0},
  174. { "DISCARDABLE", tDISCARDABLE, 0, 0, 0},
  175. { "DLGINIT", tDLGINIT, 0, 0, 0},
  176. { "EDITTEXT", tEDITTEXT, 0, 0, 0},
  177. { "END", tEND, 0, 0, 0},
  178. { "EXSTYLE", tEXSTYLE, 0, 0, 0},
  179. { "FILEFLAGS", tFILEFLAGS, 0, 0, 0},
  180. { "FILEFLAGSMASK", tFILEFLAGSMASK, 0, 0, 0},
  181. { "FILEOS", tFILEOS, 0, 0, 0},
  182. { "FILESUBTYPE", tFILESUBTYPE, 0, 0, 0},
  183. { "FILETYPE", tFILETYPE, 0, 0, 0},
  184. { "FILEVERSION", tFILEVERSION, 0, 0, 0},
  185. { "FIXED", tFIXED, 0, 0, 0},
  186. { "FONT", tFONT, 0, 0, 0},
  187. { "FONTDIR", tFONTDIR, 0, 0, 0}, /* This is a Borland BRC extension */
  188. { "GRAYED", tGRAYED, 0, 0, 0},
  189. { "GROUPBOX", tGROUPBOX, 0, 0, 0},
  190. { "HELP", tHELP, 0, 0, 0},
  191. { "HTML", tHTML, 0, 0, 0},
  192. { "ICON", tICON, 0, 0, 0},
  193. { "IMPURE", tIMPURE, 0, 0, 0},
  194. { "INACTIVE", tINACTIVE, 0, 0, 0},
  195. { "LANGUAGE", tLANGUAGE, 1, 0, 1},
  196. { "LISTBOX", tLISTBOX, 0, 0, 0},
  197. { "LOADONCALL", tLOADONCALL, 0, 0, 0},
  198. { "LTEXT", tLTEXT, 0, 0, 0},
  199. { "MENU", tMENU, 0, 0, 0},
  200. { "MENUBARBREAK", tMENUBARBREAK, 0, 0, 0},
  201. { "MENUBREAK", tMENUBREAK, 0, 0, 0},
  202. { "MENUEX", tMENUEX, 1, 0, 0},
  203. { "MENUITEM", tMENUITEM, 0, 0, 0},
  204. { "MESSAGETABLE", tMESSAGETABLE, 1, 0, 0},
  205. { "MOVEABLE", tMOVEABLE, 0, 0, 0},
  206. { "NOINVERT", tNOINVERT, 0, 0, 0},
  207. { "NOT", tNOT, 0, 0, 0},
  208. { "POPUP", tPOPUP, 0, 0, 0},
  209. { "PRELOAD", tPRELOAD, 0, 0, 0},
  210. { "PRODUCTVERSION", tPRODUCTVERSION, 0, 0, 0},
  211. { "PURE", tPURE, 0, 0, 0},
  212. { "PUSHBUTTON", tPUSHBUTTON, 0, 0, 0},
  213. { "RADIOBUTTON", tRADIOBUTTON, 0, 0, 0},
  214. { "RCDATA", tRCDATA, 0, 0, 0},
  215. { "RTEXT", tRTEXT, 0, 0, 0},
  216. { "SCROLLBAR", tSCROLLBAR, 0, 0, 0},
  217. { "SEPARATOR", tSEPARATOR, 0, 0, 0},
  218. { "SHIFT", tSHIFT, 0, 0, 0},
  219. { "STATE3", tSTATE3, 1, 0, 0},
  220. { "STRING", tSTRING, 0, 0, 0},
  221. { "STRINGTABLE", tSTRINGTABLE, 0, 0, 1},
  222. { "STYLE", tSTYLE, 0, 0, 0},
  223. { "TOOLBAR", tTOOLBAR, 1, 0, 0},
  224. { "VALUE", tVALUE, 0, 0, 0},
  225. { "VERSION", tVERSION, 1, 0, 0},
  226. { "VERSIONINFO", tVERSIONINFO, 0, 0, 0},
  227. { "VIRTKEY", tVIRTKEY, 0, 0, 0}
  228. };
  229. #define NKEYWORDS (sizeof(keywords)/sizeof(keywords[0]))
  230. #define KWP(p) ((const struct keyword *)(p))
  231. static int kw_cmp_func(const void *s1, const void *s2)
  232. {
  233. int ret;
  234. ret = compare_striA(KWP(s1)->keyword, KWP(s2)->keyword);
  235. if(!ret && (KWP(s1)->needcase || KWP(s2)->needcase))
  236. return strcmp(KWP(s1)->keyword, KWP(s2)->keyword);
  237. else
  238. return ret;
  239. }
  240. #define KW_BSEARCH
  241. #define DO_SORT
  242. static struct keyword *iskeyword(char *kw)
  243. {
  244. struct keyword *kwp;
  245. struct keyword key;
  246. key.keyword = kw;
  247. key.needcase = 0;
  248. #ifdef DO_SORT
  249. {
  250. /* Make sure that it is sorted for bsearsh */
  251. static int sorted = 0;
  252. if(!sorted)
  253. {
  254. qsort(keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
  255. sorted = 1;
  256. }
  257. }
  258. #endif
  259. #ifdef KW_BSEARCH
  260. kwp = bsearch(&key, keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
  261. #else
  262. {
  263. int i;
  264. for(i = 0; i < NKEYWORDS; i++)
  265. {
  266. if(!kw_cmp_func(&key, &keywords[i]))
  267. break;
  268. }
  269. if(i < NKEYWORDS)
  270. kwp = &keywords[i];
  271. else
  272. kwp = NULL;
  273. }
  274. #endif
  275. if(kwp == NULL || (kwp->isextension && !extensions))
  276. return NULL;
  277. else
  278. return kwp;
  279. }
  280. /* converts an integer in string form to an unsigned long and prints an error
  281. * on overflow */
  282. static unsigned long xstrtoul(const char *nptr, char **endptr, int base)
  283. {
  284. unsigned long l;
  285. errno = 0;
  286. l = strtoul(nptr, endptr, base);
  287. if (l == ULONG_MAX && errno == ERANGE)
  288. parser_error("integer constant %s is too large", nptr);
  289. return l;
  290. }
  291. %}
  292. /*
  293. **************************************************************************
  294. * The flexer starts here
  295. **************************************************************************
  296. */
  297. %%
  298. /*
  299. * Catch the GCC-style line statements here and parse them.
  300. * This has the advantage that you can #include at any
  301. * stage in the resource file.
  302. * The preprocessor generates line directives in the format:
  303. * # <linenum> "filename" <codes>
  304. *
  305. * Codes can be a sequence of:
  306. * - 1 start of new file
  307. * - 2 returning to previous
  308. * - 3 system header
  309. * - 4 interpret as C-code
  310. *
  311. * 4 is not used and 1 mutually excludes 2
  312. * Anyhow, we are not really interested in these at all
  313. * because we only want to know the linenumber and
  314. * filename.
  315. */
  316. <INITIAL,pp_cstrip>^{ws}*\#{ws}*pragma{ws}+ yy_push_state(pp_pragma);
  317. <INITIAL,pp_cstrip>^{ws}*\#{ws}* yy_push_state(pp_line);
  318. <pp_line>[^\n]* {
  319. int lineno, len;
  320. char *cptr;
  321. char *fname;
  322. yy_pop_state();
  323. lineno = (int)strtol(yytext, &cptr, 10);
  324. if(!lineno)
  325. parser_error("Malformed '#...' line-directive; invalid linenumber");
  326. fname = strchr(cptr, '"');
  327. if(!fname)
  328. parser_error("Malformed '#...' line-directive; missing filename");
  329. fname++;
  330. cptr = strchr(fname, '"');
  331. if(!cptr)
  332. parser_error("Malformed '#...' line-directive; missing terminating \"");
  333. *cptr = '\0';
  334. line_number = lineno - 1; /* We didn't read the newline */
  335. input_name = xstrdup(fname);
  336. /* ignore contents of C include files */
  337. len = strlen(input_name);
  338. if (len > 1 && !strcasecmp( input_name + len - 2, ".h" ))
  339. BEGIN(pp_cstrip);
  340. else
  341. BEGIN(INITIAL);
  342. }
  343. <pp_pragma>code_page[^\n]* yyless(9); yy_pop_state(); yy_push_state(pp_code_page);
  344. <pp_pragma>[^\n]* yy_pop_state(); if (pedantic) parser_warning("Unrecognized #pragma directive '%s'\n",yytext);
  345. <pp_code_page>\({ws}*default{ws}*\)[^\n]* current_codepage = -1; yy_pop_state();
  346. <pp_code_page>\({ws}*utf8{ws}*\)[^\n]* current_codepage = CP_UTF8; yy_pop_state();
  347. <pp_code_page>\({ws}*[0-9]+{ws}*\)[^\n]* {
  348. char *p = yytext;
  349. yy_pop_state();
  350. while (*p < '0' || *p > '9') p++;
  351. current_codepage = strtol( p, NULL, 10 );
  352. if (!is_valid_codepage( current_codepage ))
  353. {
  354. parser_error("Codepage %d not supported", current_codepage);
  355. current_codepage = 0;
  356. }
  357. }
  358. <pp_code_page>[^\n]* yy_pop_state(); parser_error("Malformed #pragma code_page directive");
  359. /*
  360. * Strip everything until a ';' taking
  361. * into account braces {} for structures,
  362. * classes and enums.
  363. */
  364. <pp_cstrip>\n line_number++; char_number = 1;
  365. <pp_cstrip>. ; /* ignore */
  366. \{ return tBEGIN;
  367. \} return tEND;
  368. [0-9]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 10);
  369. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  370. 0[xX][0-9A-Fa-f]+[lL]? { parser_lval.num = xstrtoul(yytext, 0, 16);
  371. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  372. 0[oO][0-7]+[lL]? { parser_lval.num = xstrtoul(yytext+2, 0, 8);
  373. return (yytext[yyleng-1] == 'L' || yytext[yyleng-1] == 'l') ? tLNUMBER : tNUMBER; }
  374. [A-Za-z_0-9./\\][A-Za-z_0-9./\\\-]* {
  375. struct keyword *tok = iskeyword(yytext);
  376. if(tok)
  377. {
  378. if(wanted_id && !tok->alwayskw)
  379. {
  380. parser_lval.str = make_string(yytext);
  381. return tIDENT;
  382. }
  383. else
  384. return tok->token;
  385. }
  386. else
  387. {
  388. parser_lval.str = make_string(yytext);
  389. return tIDENT;
  390. }
  391. }
  392. /*
  393. * Wide string scanning
  394. */
  395. L\" {
  396. yy_push_state(tklstr);
  397. wbufidx = 0;
  398. if(!win32)
  399. parser_error("16bit resource contains unicode strings\n");
  400. }
  401. <tklstr>\"{ws}+ |
  402. <tklstr>\" {
  403. yy_pop_state();
  404. parser_lval.str = get_buffered_wstring();
  405. return tSTRING;
  406. }
  407. <tklstr>\\[0-7]{1,6} { /* octal escape sequence */
  408. unsigned int result;
  409. result = strtoul(yytext+1, 0, 8);
  410. if ( result > 0xffff )
  411. parser_error("Character constant out of range");
  412. addwchar((WCHAR)result);
  413. }
  414. <tklstr>\\x[0-9a-fA-F]{4} { /* hex escape sequence */
  415. unsigned int result;
  416. result = strtoul(yytext+2, 0, 16);
  417. addwchar((WCHAR)result);
  418. }
  419. <tklstr>\\x[0-9a-fA-F]{1,3} { parser_error("Invalid hex escape sequence '%s'", yytext); }
  420. <tklstr>\\[0-9]+ parser_error("Bad escape sequence");
  421. <tklstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
  422. <tklstr>\\a addwchar('\a');
  423. <tklstr>\\b addwchar('\b');
  424. <tklstr>\\f addwchar('\f');
  425. <tklstr>\\n addwchar('\n');
  426. <tklstr>\\r addwchar('\r');
  427. <tklstr>\\t addwchar('\t');
  428. <tklstr>\\v addwchar('\v');
  429. <tklstr>\\. {
  430. if (yytext[1] & 0x80)
  431. parser_error("Invalid char %u in wide string", (unsigned char)yytext[1]);
  432. addwchar(yytext[1]);
  433. }
  434. <tklstr>\\\r\n addwchar(yytext[2]); line_number++; char_number = 1;
  435. <tklstr>\"\" addwchar('\"'); /* "bla""bla" -> "bla\"bla" */
  436. <tklstr>\\\"\" addwchar('\"'); /* "bla\""bla" -> "bla\"bla" */
  437. <tklstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
  438. <tklstr>[^\\\n\"]+ {
  439. char *yptr = yytext;
  440. while(*yptr) /* FIXME: codepage translation */
  441. {
  442. if (*yptr & 0x80)
  443. parser_error("Invalid char %u in wide string", (unsigned char)*yptr);
  444. addwchar(*yptr++ & 0xff);
  445. }
  446. }
  447. <tklstr>\n parser_error("Unterminated string");
  448. /*
  449. * Normal string scanning
  450. */
  451. \" yy_push_state(tkstr); cbufidx = 0;
  452. <tkstr>\"{ws}+ |
  453. <tkstr>\" {
  454. yy_pop_state();
  455. parser_lval.str = get_buffered_cstring();
  456. return tSTRING;
  457. }
  458. <tkstr>\\[0-7]{1,3} { /* octal escape sequence */
  459. int result;
  460. result = strtol(yytext+1, 0, 8);
  461. if ( result > 0xff )
  462. parser_error("Character constant out of range");
  463. addcchar((char)result);
  464. }
  465. <tkstr>\\x[0-9a-fA-F]{2} { /* hex escape sequence */
  466. int result;
  467. result = strtol(yytext+2, 0, 16);
  468. addcchar((char)result);
  469. }
  470. <tkstr>\\x[0-9a-fA-F] { parser_error("Invalid hex escape sequence '%s'", yytext); }
  471. <tkstr>\\[0-9]+ parser_error("Bad escape sequence");
  472. <tkstr>\\\n{ws}* line_number++; char_number = 1; /* backslash at EOL continues string after leading whitespace on next line */
  473. <tkstr>\\a addcchar('\a');
  474. <tkstr>\\b addcchar('\b');
  475. <tkstr>\\f addcchar('\f');
  476. <tkstr>\\n addcchar('\n');
  477. <tkstr>\\r addcchar('\r');
  478. <tkstr>\\t addcchar('\t');
  479. <tkstr>\\v addcchar('\v');
  480. <tkstr>\\. addcchar(yytext[1]);
  481. <tkstr>\\\r\n addcchar(yytext[2]); line_number++; char_number = 1;
  482. <tkstr>[^\\\n\"]+ {
  483. char *yptr = yytext;
  484. while(*yptr)
  485. addcchar(*yptr++);
  486. }
  487. <tkstr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
  488. <tkstr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
  489. <tkstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
  490. <tkstr>\n parser_error("Unterminated string");
  491. /*
  492. * Raw data scanning
  493. */
  494. \' yy_push_state(tkrcd); cbufidx = 0;
  495. <tkrcd>\' {
  496. yy_pop_state();
  497. parser_lval.raw = new_raw_data();
  498. parser_lval.raw->size = cbufidx;
  499. parser_lval.raw->data = xmalloc(parser_lval.raw->size);
  500. memcpy(parser_lval.raw->data, cbuffer, parser_lval.raw->size);
  501. return tRAWDATA;
  502. }
  503. <tkrcd>[0-9a-fA-F]{2} {
  504. int result;
  505. result = strtol(yytext, 0, 16);
  506. addcchar((char)result);
  507. }
  508. <tkrcd>{ws}+ ; /* Ignore space */
  509. <tkrcd>\n line_number++; char_number = 1;
  510. <tkrcd>. parser_error("Malformed data-line");
  511. /*
  512. * Comment stripping
  513. * Should never occur after preprocessing
  514. */
  515. <INITIAL,pp_cstrip>"/*" {
  516. yy_push_state(comment);
  517. save_wanted_id = wanted_id;
  518. if(!no_preprocess)
  519. parser_warning("Found comments after preprocessing, please report\n");
  520. }
  521. <comment>[^*\n]* ;
  522. <comment>"*"+[^*/\n]* ;
  523. <comment>\n line_number++; char_number = 1;
  524. <comment>"*"+"/" yy_pop_state(); want_id = save_wanted_id;
  525. ;[^\n]* want_id = wanted_id; /* not really comment, but left-over c-junk */
  526. "//"[^\n]* want_id = wanted_id; if(!no_preprocess) parser_warning("Found comments after preprocessing, please report\n");
  527. \n {
  528. want_id = wanted_id;
  529. line_number++;
  530. char_number = 1;
  531. if(want_nl)
  532. {
  533. want_nl = 0;
  534. return tNL;
  535. }
  536. }
  537. {ws}+ want_id = wanted_id; /* Eat whitespace */
  538. <INITIAL>[ -~] return yytext[0];
  539. <*>.|\n {
  540. /* Catch all rule to find any unmatched text */
  541. if(*yytext == '\n')
  542. {
  543. line_number++;
  544. char_number = 1;
  545. }
  546. parser_error("Unmatched text '%c' (0x%02x) YY_START=%d",
  547. isprint((unsigned char)*yytext) ? *yytext : '.', *yytext, YY_START);
  548. }
  549. %%
  550. /* These dup functions copy the enclosed '\0' from
  551. * the resource string.
  552. */
  553. static void addcchar(char c)
  554. {
  555. if(cbufidx >= cbufalloc)
  556. {
  557. cbufalloc += 1024;
  558. cbuffer = xrealloc(cbuffer, cbufalloc * sizeof(cbuffer[0]));
  559. if(cbufalloc > 65536)
  560. parser_warning("Reallocating string buffer larger than 64kB\n");
  561. }
  562. cbuffer[cbufidx++] = c;
  563. }
  564. static void addwchar(WCHAR s)
  565. {
  566. if(wbufidx >= wbufalloc)
  567. {
  568. wbufalloc += 1024;
  569. wbuffer = xrealloc(wbuffer, wbufalloc * sizeof(wbuffer[0]));
  570. if(wbufalloc > 65536)
  571. parser_warning("Reallocating wide string buffer larger than 64kB\n");
  572. }
  573. wbuffer[wbufidx++] = s;
  574. }
  575. static string_t *get_buffered_cstring(void)
  576. {
  577. string_t *str = new_string();
  578. str->size = cbufidx;
  579. str->type = str_char;
  580. str->str.cstr = xmalloc(cbufidx+1);
  581. memcpy(str->str.cstr, cbuffer, cbufidx);
  582. str->str.cstr[cbufidx] = '\0';
  583. if (!current_codepage || current_codepage == -1 || !win32) /* store as ANSI string */
  584. {
  585. if (!current_codepage) parser_error("Codepage set to Unicode only, cannot use ASCII string here");
  586. return str;
  587. }
  588. else /* convert to Unicode before storing */
  589. {
  590. string_t *str_w = convert_string_unicode( str, current_codepage );
  591. if (check_valid_utf8( str, current_codepage ))
  592. parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
  593. str->str.cstr, current_codepage );
  594. free_string( str );
  595. return str_w;
  596. }
  597. }
  598. static string_t *get_buffered_wstring(void)
  599. {
  600. string_t *str = new_string();
  601. str->size = wbufidx;
  602. str->type = str_unicode;
  603. str->str.wstr = xmalloc((wbufidx+1)*sizeof(WCHAR));
  604. memcpy(str->str.wstr, wbuffer, wbufidx*sizeof(WCHAR));
  605. str->str.wstr[wbufidx] = 0;
  606. return str;
  607. }
  608. static string_t *make_string(char *s)
  609. {
  610. string_t *ret, *str = new_string();
  611. str->size = strlen(s);
  612. str->type = str_char;
  613. str->str.cstr = xmalloc(str->size+1);
  614. memcpy(str->str.cstr, s, str->size+1);
  615. if (current_codepage <= 0 || !win32) return str;
  616. ret = convert_string_unicode( str, current_codepage );
  617. free_string( str );
  618. return ret;
  619. }