pcre.nim 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. {.deadCodeElim: on.} # dce option deprecated
  10. # The current PCRE version information.
  11. const
  12. PCRE_MAJOR* = 8
  13. PCRE_MINOR* = 36
  14. PCRE_PRERELEASE* = true
  15. PCRE_DATE* = "2014-09-26"
  16. # When an application links to a PCRE DLL in Windows, the symbols that are
  17. # imported have to be identified as such. When building PCRE, the appropriate
  18. # export setting is defined in pcre_internal.h, which includes this file. So we
  19. # don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL.
  20. # By default, we use the standard "extern" declarations.
  21. # Allow for C++ users
  22. # Public options. Some are compile-time only, some are run-time only, and some
  23. # are both. Most of the compile-time options are saved with the compiled regex
  24. # so that they can be inspected during studying (and therefore JIT compiling).
  25. # Note that pcre_study() has its own set of options. Originally, all the options
  26. # defined here used distinct bits. However, almost all the bits in a 32-bit word
  27. # are now used, so in order to conserve them, option bits that were previously
  28. # only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
  29. # also be used for compile-time options that affect only compiling and are not
  30. # relevant for studying or JIT compiling.
  31. #
  32. # Some options for pcre_compile() change its behaviour but do not affect the
  33. # behaviour of the execution functions. Other options are passed through to the
  34. # execution functions and affect their behaviour, with or without affecting the
  35. # behaviour of pcre_compile().
  36. #
  37. # Options that can be passed to pcre_compile() are tagged Cx below, with these
  38. # variants:
  39. #
  40. # C1 Affects compile only
  41. # C2 Does not affect compile; affects exec, dfa_exec
  42. # C3 Affects compile, exec, dfa_exec
  43. # C4 Affects compile, exec, dfa_exec, study
  44. # C5 Affects compile, exec, study
  45. #
  46. # Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged
  47. # with E and D, respectively. They take precedence over C3, C4, and C5 settings
  48. # passed from pcre_compile(). Those that are compatible with JIT execution are
  49. # flagged with J.
  50. const
  51. CASELESS* = 0x00000001 # C1
  52. MULTILINE* = 0x00000002 # C1
  53. DOTALL* = 0x00000004 # C1
  54. EXTENDED* = 0x00000008 # C1
  55. ANCHORED* = 0x00000010 # C4 E D
  56. DOLLAR_ENDONLY* = 0x00000020 # C2
  57. EXTRA* = 0x00000040 # C1
  58. NOTBOL* = 0x00000080 # E D J
  59. NOTEOL* = 0x00000100 # E D J
  60. UNGREEDY* = 0x00000200 # C1
  61. NOTEMPTY* = 0x00000400 # E D J
  62. UTF8* = 0x00000800 # C4 )
  63. UTF16* = 0x00000800 # C4 ) Synonyms
  64. UTF32* = 0x00000800 # C4 )
  65. NO_AUTO_CAPTURE* = 0x00001000 # C1
  66. NO_UTF8_CHECK* = 0x00002000 # C1 E D J )
  67. NO_UTF16_CHECK* = 0x00002000 # C1 E D J ) Synonyms
  68. NO_UTF32_CHECK* = 0x00002000 # C1 E D J )
  69. AUTO_CALLOUT* = 0x00004000 # C1
  70. PARTIAL_SOFT* = 0x00008000 # E D J ) Synonyms
  71. PARTIAL* = 0x00008000 # E D J )
  72. # This pair use the same bit.
  73. const
  74. NEVER_UTF* = 0x00010000 # C1 ) Overlaid
  75. DFA_SHORTEST* = 0x00010000 # D ) Overlaid
  76. # This pair use the same bit.
  77. const
  78. NO_AUTO_POSSESS* = 0x00020000 # C1 ) Overlaid
  79. DFA_RESTART* = 0x00020000 # D ) Overlaid
  80. const
  81. FIRSTLINE* = 0x00040000 # C3
  82. DUPNAMES* = 0x00080000 # C1
  83. NEWLINE_CR* = 0x00100000 # C3 E D
  84. NEWLINE_LF* = 0x00200000 # C3 E D
  85. NEWLINE_CRLF* = 0x00300000 # C3 E D
  86. NEWLINE_ANY* = 0x00400000 # C3 E D
  87. NEWLINE_ANYCRLF* = 0x00500000 # C3 E D
  88. BSR_ANYCRLF* = 0x00800000 # C3 E D
  89. BSR_UNICODE* = 0x01000000 # C3 E D
  90. JAVASCRIPT_COMPAT* = 0x02000000 # C5
  91. NO_START_OPTIMIZE* = 0x04000000 # C2 E D ) Synonyms
  92. NO_START_OPTIMISE* = 0x04000000 # C2 E D )
  93. PARTIAL_HARD* = 0x08000000 # E D J
  94. NOTEMPTY_ATSTART* = 0x10000000 # E D J
  95. UCP* = 0x20000000 # C3
  96. ## Exec-time and get/set-time error codes
  97. const
  98. ERROR_NOMATCH* = -1
  99. ERROR_NULL* = -2
  100. ERROR_BADOPTION* = -3
  101. ERROR_BADMAGIC* = -4
  102. ERROR_UNKNOWN_OPCODE* = -5
  103. ERROR_UNKNOWN_NODE* = -5 ## For backward compatibility
  104. ERROR_NOMEMORY* = -6
  105. ERROR_NOSUBSTRING* = -7
  106. ERROR_MATCHLIMIT* = -8
  107. ERROR_CALLOUT* = -9 ## Never used by PCRE itself
  108. ERROR_BADUTF8* = -10 ## Same for 8/16/32
  109. ERROR_BADUTF16* = -10 ## Same for 8/16/32
  110. ERROR_BADUTF32* = -10 ## Same for 8/16/32
  111. ERROR_BADUTF8_OFFSET* = -11 ## Same for 8/16
  112. ERROR_BADUTF16_OFFSET* = -11 ## Same for 8/16
  113. ERROR_PARTIAL* = -12
  114. ERROR_BADPARTIAL* = -13
  115. ERROR_INTERNAL* = -14
  116. ERROR_BADCOUNT* = -15
  117. ERROR_DFA_UITEM* = -16
  118. ERROR_DFA_UCOND* = -17
  119. ERROR_DFA_UMLIMIT* = -18
  120. ERROR_DFA_WSSIZE* = -19
  121. ERROR_DFA_RECURSE* = -20
  122. ERROR_RECURSIONLIMIT* = -21
  123. ERROR_NULLWSLIMIT* = -22 ## No longer actually used
  124. ERROR_BADNEWLINE* = -23
  125. ERROR_BADOFFSET* = -24
  126. ERROR_SHORTUTF8* = -25
  127. ERROR_SHORTUTF16* = -25 ## Same for 8/16
  128. ERROR_RECURSELOOP* = -26
  129. ERROR_JIT_STACKLIMIT* = -27
  130. ERROR_BADMODE* = -28
  131. ERROR_BADENDIANNESS* = -29
  132. ERROR_DFA_BADRESTART* = -30
  133. ERROR_JIT_BADOPTION* = -31
  134. ERROR_BADLENGTH* = -32
  135. ERROR_UNSET* = -33
  136. ## Specific error codes for UTF-8 validity checks
  137. const
  138. UTF8_ERR0* = 0
  139. UTF8_ERR1* = 1
  140. UTF8_ERR2* = 2
  141. UTF8_ERR3* = 3
  142. UTF8_ERR4* = 4
  143. UTF8_ERR5* = 5
  144. UTF8_ERR6* = 6
  145. UTF8_ERR7* = 7
  146. UTF8_ERR8* = 8
  147. UTF8_ERR9* = 9
  148. UTF8_ERR10* = 10
  149. UTF8_ERR11* = 11
  150. UTF8_ERR12* = 12
  151. UTF8_ERR13* = 13
  152. UTF8_ERR14* = 14
  153. UTF8_ERR15* = 15
  154. UTF8_ERR16* = 16
  155. UTF8_ERR17* = 17
  156. UTF8_ERR18* = 18
  157. UTF8_ERR19* = 19
  158. UTF8_ERR20* = 20
  159. UTF8_ERR21* = 21
  160. UTF8_ERR22* = 22 # Unused (was non-character)
  161. ## Specific error codes for UTF-16 validity checks
  162. const
  163. UTF16_ERR0* = 0
  164. UTF16_ERR1* = 1
  165. UTF16_ERR2* = 2
  166. UTF16_ERR3* = 3
  167. UTF16_ERR4* = 4 # Unused (was non-character)
  168. ## Specific error codes for UTF-32 validity checks
  169. const
  170. UTF32_ERR0* = 0
  171. UTF32_ERR1* = 1
  172. UTF32_ERR2* = 2 # Unused (was non-character)
  173. UTF32_ERR3* = 3
  174. ## Request types for pcre_fullinfo()
  175. const
  176. INFO_OPTIONS* = 0
  177. INFO_SIZE* = 1
  178. INFO_CAPTURECOUNT* = 2
  179. INFO_BACKREFMAX* = 3
  180. INFO_FIRSTBYTE* = 4
  181. INFO_FIRSTCHAR* = 4 ## For backwards compatibility
  182. INFO_FIRSTTABLE* = 5
  183. INFO_LASTLITERAL* = 6
  184. INFO_NAMEENTRYSIZE* = 7
  185. INFO_NAMECOUNT* = 8
  186. INFO_NAMETABLE* = 9
  187. INFO_STUDYSIZE* = 10
  188. INFO_DEFAULT_TABLES* = 11
  189. INFO_OKPARTIAL* = 12
  190. INFO_JCHANGED* = 13
  191. INFO_HASCRORLF* = 14
  192. INFO_MINLENGTH* = 15
  193. INFO_JIT* = 16
  194. INFO_JITSIZE* = 17
  195. INFO_MAXLOOKBEHIND* = 18
  196. INFO_FIRSTCHARACTER* = 19
  197. INFO_FIRSTCHARACTERFLAGS* = 20
  198. INFO_REQUIREDCHAR* = 21
  199. INFO_REQUIREDCHARFLAGS* = 22
  200. INFO_MATCHLIMIT* = 23
  201. INFO_RECURSIONLIMIT* = 24
  202. INFO_MATCH_EMPTY* = 25
  203. ## Request types for pcre_config(). Do not re-arrange, in order to remain
  204. ## compatible.
  205. const
  206. CONFIG_UTF8* = 0
  207. CONFIG_NEWLINE* = 1
  208. CONFIG_LINK_SIZE* = 2
  209. CONFIG_POSIX_MALLOC_THRESHOLD* = 3
  210. CONFIG_MATCH_LIMIT* = 4
  211. CONFIG_STACKRECURSE* = 5
  212. CONFIG_UNICODE_PROPERTIES* = 6
  213. CONFIG_MATCH_LIMIT_RECURSION* = 7
  214. CONFIG_BSR* = 8
  215. CONFIG_JIT* = 9
  216. CONFIG_UTF16* = 10
  217. CONFIG_JITTARGET* = 11
  218. CONFIG_UTF32* = 12
  219. CONFIG_PARENS_LIMIT* = 13
  220. ## Request types for pcre_study(). Do not re-arrange, in order to remain
  221. ## compatible.
  222. const
  223. STUDY_JIT_COMPILE* = 0x0001
  224. STUDY_JIT_PARTIAL_SOFT_COMPILE* = 0x0002
  225. STUDY_JIT_PARTIAL_HARD_COMPILE* = 0x0004
  226. STUDY_EXTRA_NEEDED* = 0x0008
  227. ## Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
  228. ## these bits, just add new ones on the end, in order to remain compatible.
  229. const
  230. EXTRA_STUDY_DATA* = 0x0001
  231. EXTRA_MATCH_LIMIT* = 0x0002
  232. EXTRA_CALLOUT_DATA* = 0x0004
  233. EXTRA_TABLES* = 0x0008
  234. EXTRA_MATCH_LIMIT_RECURSION* = 0x0010
  235. EXTRA_MARK* = 0x0020
  236. EXTRA_EXECUTABLE_JIT* = 0x0040
  237. ## Types
  238. type
  239. Pcre* = object
  240. Pcre16* = object
  241. Pcre32* = object
  242. JitStack* = object
  243. JitStack16* = object
  244. JitStack32* = object
  245. ## The structure for passing additional data to pcre_exec(). This is defined in
  246. ## such as way as to be extensible. Always add new fields at the end, in order
  247. ## to remain compatible.
  248. type
  249. ExtraData* = object
  250. flags*: clong ## Bits for which fields are set
  251. study_data*: pointer ## Opaque data from pcre_study()
  252. match_limit*: clong ## Maximum number of calls to match()
  253. callout_data*: pointer ## Data passed back in callouts
  254. tables*: pointer ## Pointer to character tables
  255. match_limit_recursion*: clong ## Max recursive calls to match()
  256. mark*: pointer ## For passing back a mark pointer
  257. executable_jit*: pointer ## Contains a pointer to a compiled jit code
  258. ## The structure for passing out data via the pcre_callout_function. We use a
  259. ## structure so that new fields can be added on the end in future versions,
  260. ## without changing the API of the function, thereby allowing old clients to
  261. ## work without modification.
  262. type
  263. CalloutBlock* = object
  264. version* : cint ## Identifies version of block
  265. # ------------------------ Version 0 -------------------------------
  266. callout_number* : cint ## Number compiled into pattern
  267. offset_vector* : ptr cint ## The offset vector
  268. subject* : cstring ## The subject being matched
  269. subject_length* : cint ## The length of the subject
  270. start_match* : cint ## Offset to start of this match attempt
  271. current_position*: cint ## Where we currently are in the subject
  272. capture_top* : cint ## Max current capture
  273. capture_last* : cint ## Most recently closed capture
  274. callout_data* : pointer ## Data passed in with the call
  275. # ------------------- Added for Version 1 --------------------------
  276. pattern_position*: cint ## Offset to next item in the pattern
  277. next_item_length*: cint ## Length of next item in the pattern
  278. # ------------------- Added for Version 2 --------------------------
  279. mark* : pointer ## Pointer to current mark or NULL
  280. # ------------------------------------------------------------------
  281. ## User defined callback which provides a stack just before the match starts.
  282. type
  283. JitCallback* = proc (a: pointer): ptr JitStack {.cdecl.}
  284. when not defined(usePcreHeader):
  285. when hostOS == "windows":
  286. when defined(nimOldDlls):
  287. const pcreDll = "pcre.dll"
  288. elif defined(cpu64):
  289. const pcreDll = "pcre64.dll"
  290. else:
  291. const pcreDll = "pcre32.dll"
  292. elif hostOS == "macosx":
  293. const pcreDll = "libpcre(.3|.1|).dylib"
  294. else:
  295. const pcreDll = "libpcre.so(.3|.1|)"
  296. {.push dynlib: pcreDll.}
  297. else:
  298. {.push header: "<pcre.h>".}
  299. {.push cdecl, importc: "pcre_$1".}
  300. # Exported PCRE functions
  301. proc compile*(pattern: cstring,
  302. options: cint,
  303. errptr: ptr cstring,
  304. erroffset: ptr cint,
  305. tableptr: pointer): ptr Pcre
  306. proc compile2*(pattern: cstring,
  307. options: cint,
  308. errorcodeptr: ptr cint,
  309. errptr: ptr cstring,
  310. erroffset: ptr cint,
  311. tableptr: pointer): ptr Pcre
  312. proc config*(what: cint,
  313. where: pointer): cint
  314. proc copy_named_substring*(code: ptr Pcre,
  315. subject: cstring,
  316. ovector: ptr cint,
  317. stringcount: cint,
  318. stringname: cstring,
  319. buffer: cstring,
  320. buffersize: cint): cint
  321. proc copy_substring*(subject: cstring,
  322. ovector: ptr cint,
  323. stringcount: cint,
  324. stringnumber: cint,
  325. buffer: cstring,
  326. buffersize: cint): cint
  327. proc dfa_exec*(code: ptr Pcre,
  328. extra: ptr ExtraData,
  329. subject: cstring,
  330. length: cint,
  331. startoffset: cint,
  332. options: cint,
  333. ovector: ptr cint,
  334. ovecsize: cint,
  335. workspace: ptr cint,
  336. wscount: cint): cint
  337. proc exec*(code: ptr Pcre,
  338. extra: ptr ExtraData,
  339. subject: cstring,
  340. length: cint,
  341. startoffset: cint,
  342. options: cint,
  343. ovector: ptr cint,
  344. ovecsize: cint): cint
  345. proc jit_exec*(code: ptr Pcre,
  346. extra: ptr ExtraData,
  347. subject: cstring,
  348. length: cint,
  349. startoffset: cint,
  350. options: cint,
  351. ovector: ptr cint,
  352. ovecsize: cint,
  353. jstack: ptr JitStack): cint
  354. proc free_substring*(stringptr: cstring)
  355. proc free_substring_list*(stringptr: cstringArray)
  356. proc fullinfo*(code: ptr Pcre,
  357. extra: ptr ExtraData,
  358. what: cint,
  359. where: pointer): cint
  360. proc get_named_substring*(code: ptr Pcre,
  361. subject: cstring,
  362. ovector: ptr cint,
  363. stringcount: cint,
  364. stringname: cstring,
  365. stringptr: cstringArray): cint
  366. proc get_stringnumber*(code: ptr Pcre,
  367. name: cstring): cint
  368. proc get_stringtable_entries*(code: ptr Pcre,
  369. name: cstring,
  370. first: cstringArray,
  371. last: cstringArray): cint
  372. proc get_substring*(subject: cstring,
  373. ovector: ptr cint,
  374. stringcount: cint,
  375. stringnumber: cint,
  376. stringptr: cstringArray): cint
  377. proc get_substring_list*(subject: cstring,
  378. ovector: ptr cint,
  379. stringcount: cint,
  380. listptr: ptr cstringArray): cint
  381. proc maketables*(): pointer
  382. proc refcount*(code: ptr Pcre,
  383. adjust: cint): cint
  384. proc study*(code: ptr Pcre,
  385. options: cint,
  386. errptr: ptr cstring): ptr ExtraData
  387. proc free_study*(extra: ptr ExtraData)
  388. proc version*(): cstring
  389. # Utility functions for byte order swaps.
  390. proc pattern_to_host_byte_order*(code: ptr Pcre,
  391. extra: ptr ExtraData,
  392. tables: pointer): cint
  393. # JIT compiler related functions.
  394. proc jit_stack_alloc*(startsize: cint,
  395. maxsize: cint): ptr JitStack
  396. proc jit_stack_free*(stack: ptr JitStack)
  397. proc assign_jit_stack*(extra: ptr ExtraData,
  398. callback: JitCallback,
  399. data: pointer)
  400. proc jit_free_unused_memory*()
  401. # There was an odd function with `var cstring` instead of `ptr`
  402. proc study*(code: ptr Pcre,
  403. options: cint,
  404. errptr: var cstring): ptr ExtraData {.deprecated.}
  405. {.pop.}
  406. {.pop.}
  407. type
  408. PPcre* {.deprecated.} = ptr Pcre
  409. PJitStack* {.deprecated.} = ptr JitStack