strutils.nim 91 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains various string utility routines.
  10. ## See the module `re <re.html>`_ for regular expression support.
  11. ## See the module `pegs <pegs.html>`_ for PEG support.
  12. ## This module is available for the `JavaScript target
  13. ## <backends.html#the-javascript-target>`_.
  14. import parseutils
  15. from math import pow, floor, log10
  16. from algorithm import reverse
  17. when defined(nimVmExportFixed):
  18. from unicode import toLower, toUpper
  19. export toLower, toUpper
  20. {.deadCodeElim: on.} # dce option deprecated
  21. {.push debugger:off .} # the user does not want to trace a part
  22. # of the standard library!
  23. include "system/inclrtl"
  24. {.pop.}
  25. # Support old split with set[char]
  26. when defined(nimOldSplit):
  27. {.pragma: deprecatedSplit, deprecated.}
  28. else:
  29. {.pragma: deprecatedSplit.}
  30. const
  31. Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
  32. ## All the characters that count as whitespace.
  33. Letters* = {'A'..'Z', 'a'..'z'}
  34. ## the set of letters
  35. Digits* = {'0'..'9'}
  36. ## the set of digits
  37. HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'}
  38. ## the set of hexadecimal digits
  39. IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
  40. ## the set of characters an identifier can consist of
  41. IdentStartChars* = {'a'..'z', 'A'..'Z', '_'}
  42. ## the set of characters an identifier can start with
  43. NewLines* = {'\13', '\10'}
  44. ## the set of characters a newline terminator can start with
  45. AllChars* = {'\x00'..'\xFF'}
  46. ## A set with all the possible characters.
  47. ##
  48. ## Not very useful by its own, you can use it to create *inverted* sets to
  49. ## make the `find() proc <#find,string,set[char],int>`_ find **invalid**
  50. ## characters in strings. Example:
  51. ##
  52. ## .. code-block:: nim
  53. ## let invalid = AllChars - Digits
  54. ## doAssert "01234".find(invalid) == -1
  55. ## doAssert "01A34".find(invalid) == 2
  56. proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar,
  57. rtl, extern: "nsuIsAlphaAsciiChar".}=
  58. ## Checks whether or not `c` is alphabetical.
  59. ##
  60. ## This checks a-z, A-Z ASCII characters only.
  61. runnableExamples:
  62. doAssert isAlphaAscii('e') == true
  63. doAssert isAlphaAscii('E') == true
  64. doAssert isAlphaAscii('8') == false
  65. return c in Letters
  66. proc isAlphaNumeric*(c: char): bool {.noSideEffect, procvar,
  67. rtl, extern: "nsuIsAlphaNumericChar".} =
  68. ## Checks whether or not `c` is alphanumeric.
  69. ##
  70. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  71. runnableExamples:
  72. doAssert isAlphaNumeric('n') == true
  73. doAssert isAlphaNumeric('8') == true
  74. doAssert isAlphaNumeric(' ') == false
  75. return c in Letters+Digits
  76. proc isDigit*(c: char): bool {.noSideEffect, procvar,
  77. rtl, extern: "nsuIsDigitChar".} =
  78. ## Checks whether or not `c` is a number.
  79. ##
  80. ## This checks 0-9 ASCII characters only.
  81. runnableExamples:
  82. doAssert isDigit('n') == false
  83. doAssert isDigit('8') == true
  84. return c in Digits
  85. proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar,
  86. rtl, extern: "nsuIsSpaceAsciiChar".} =
  87. ## Checks whether or not `c` is a whitespace character.
  88. runnableExamples:
  89. doAssert isSpaceAscii('n') == false
  90. doAssert isSpaceAscii(' ') == true
  91. return c in Whitespace
  92. proc isLowerAscii*(c: char): bool {.noSideEffect, procvar,
  93. rtl, extern: "nsuIsLowerAsciiChar".} =
  94. ## Checks whether or not `c` is a lower case character.
  95. ##
  96. ## This checks ASCII characters only.
  97. runnableExamples:
  98. doAssert isLowerAscii('e') == true
  99. doAssert isLowerAscii('E') == false
  100. doAssert isLowerAscii('7') == false
  101. return c in {'a'..'z'}
  102. proc isUpperAscii*(c: char): bool {.noSideEffect, procvar,
  103. rtl, extern: "nsuIsUpperAsciiChar".} =
  104. ## Checks whether or not `c` is an upper case character.
  105. ##
  106. ## This checks ASCII characters only.
  107. runnableExamples:
  108. doAssert isUpperAscii('e') == false
  109. doAssert isUpperAscii('E') == true
  110. doAssert isUpperAscii('7') == false
  111. return c in {'A'..'Z'}
  112. template isImpl(call) =
  113. if s.len == 0: return false
  114. result = true
  115. for c in s:
  116. if not call(c): return false
  117. proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
  118. rtl, extern: "nsuIsAlphaAsciiStr",
  119. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  120. ## Checks whether or not `s` is alphabetical.
  121. ##
  122. ## This checks a-z, A-Z ASCII characters only.
  123. ## Returns true if all characters in `s` are
  124. ## alphabetic and there is at least one character
  125. ## in `s`.
  126. runnableExamples:
  127. doAssert isAlphaAscii("fooBar") == true
  128. doAssert isAlphaAscii("fooBar1") == false
  129. doAssert isAlphaAscii("foo Bar") == false
  130. isImpl isAlphaAscii
  131. proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
  132. rtl, extern: "nsuIsAlphaNumericStr",
  133. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  134. ## Checks whether or not `s` is alphanumeric.
  135. ##
  136. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  137. ## Returns true if all characters in `s` are
  138. ## alpanumeric and there is at least one character
  139. ## in `s`.
  140. runnableExamples:
  141. doAssert isAlphaNumeric("fooBar") == true
  142. doAssert isAlphaNumeric("fooBar") == true
  143. doAssert isAlphaNumeric("foo Bar") == false
  144. isImpl isAlphaNumeric
  145. proc isDigit*(s: string): bool {.noSideEffect, procvar,
  146. rtl, extern: "nsuIsDigitStr",
  147. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  148. ## Checks whether or not `s` is a numeric value.
  149. ##
  150. ## This checks 0-9 ASCII characters only.
  151. ## Returns true if all characters in `s` are
  152. ## numeric and there is at least one character
  153. ## in `s`.
  154. runnableExamples:
  155. doAssert isDigit("1908") == true
  156. doAssert isDigit("fooBar1") == false
  157. isImpl isDigit
  158. proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
  159. rtl, extern: "nsuIsSpaceAsciiStr",
  160. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  161. ## Checks whether or not `s` is completely whitespace.
  162. ##
  163. ## Returns true if all characters in `s` are whitespace
  164. ## characters and there is at least one character in `s`.
  165. runnableExamples:
  166. doAssert isSpaceAscii(" ") == true
  167. doAssert isSpaceAscii("") == false
  168. isImpl isSpaceAscii
  169. template isCaseImpl(s, charProc, skipNonAlpha) =
  170. var hasAtleastOneAlphaChar = false
  171. if s.len == 0: return false
  172. for c in s:
  173. if skipNonAlpha:
  174. var charIsAlpha = c.isAlphaAscii()
  175. if not hasAtleastOneAlphaChar:
  176. hasAtleastOneAlphaChar = charIsAlpha
  177. if charIsAlpha and (not charProc(c)):
  178. return false
  179. else:
  180. if not charProc(c):
  181. return false
  182. return if skipNonAlpha: hasAtleastOneAlphaChar else: true
  183. proc isLowerAscii*(s: string, skipNonAlpha: bool): bool {.
  184. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  185. ## Checks whether ``s`` is lower case.
  186. ##
  187. ## This checks ASCII characters only.
  188. ##
  189. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  190. ## characters in ``s`` are lower case. Returns false if none of the
  191. ## characters in ``s`` are alphabetical.
  192. ##
  193. ## If ``skipNonAlpha`` is false, returns true only if all characters
  194. ## in ``s`` are alphabetical and lower case.
  195. ##
  196. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  197. ## an empty string.
  198. runnableExamples:
  199. doAssert isLowerAscii("1foobar", false) == false
  200. doAssert isLowerAscii("1foobar", true) == true
  201. doAssert isLowerAscii("1fooBar", true) == false
  202. isCaseImpl(s, isLowerAscii, skipNonAlpha)
  203. proc isUpperAscii*(s: string, skipNonAlpha: bool): bool {.
  204. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  205. ## Checks whether ``s`` is upper case.
  206. ##
  207. ## This checks ASCII characters only.
  208. ##
  209. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  210. ## characters in ``s`` are upper case. Returns false if none of the
  211. ## characters in ``s`` are alphabetical.
  212. ##
  213. ## If ``skipNonAlpha`` is false, returns true only if all characters
  214. ## in ``s`` are alphabetical and upper case.
  215. ##
  216. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  217. ## an empty string.
  218. runnableExamples:
  219. doAssert isUpperAscii("1FOO", false) == false
  220. doAssert isUpperAscii("1FOO", true) == true
  221. doAssert isUpperAscii("1Foo", true) == false
  222. isCaseImpl(s, isUpperAscii, skipNonAlpha)
  223. proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
  224. rtl, extern: "nsuToLowerAsciiChar".} =
  225. ## Returns the lower case version of ``c``.
  226. ##
  227. ## This works only for the letters ``A-Z``. See `unicode.toLower
  228. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  229. ## character.
  230. runnableExamples:
  231. doAssert toLowerAscii('A') == 'a'
  232. doAssert toLowerAscii('e') == 'e'
  233. if c in {'A'..'Z'}:
  234. result = chr(ord(c) + (ord('a') - ord('A')))
  235. else:
  236. result = c
  237. template toImpl(call) =
  238. result = newString(len(s))
  239. for i in 0..len(s) - 1:
  240. result[i] = call(s[i])
  241. proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
  242. rtl, extern: "nsuToLowerAsciiStr".} =
  243. ## Converts `s` into lower case.
  244. ##
  245. ## This works only for the letters ``A-Z``. See `unicode.toLower
  246. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  247. ## character.
  248. runnableExamples:
  249. doAssert toLowerAscii("FooBar!") == "foobar!"
  250. toImpl toLowerAscii
  251. proc toUpperAscii*(c: char): char {.noSideEffect, procvar,
  252. rtl, extern: "nsuToUpperAsciiChar".} =
  253. ## Converts `c` into upper case.
  254. ##
  255. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  256. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  257. ## character.
  258. runnableExamples:
  259. doAssert toUpperAscii('a') == 'A'
  260. doAssert toUpperAscii('E') == 'E'
  261. if c in {'a'..'z'}:
  262. result = chr(ord(c) - (ord('a') - ord('A')))
  263. else:
  264. result = c
  265. proc toUpperAscii*(s: string): string {.noSideEffect, procvar,
  266. rtl, extern: "nsuToUpperAsciiStr".} =
  267. ## Converts `s` into upper case.
  268. ##
  269. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  270. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  271. ## character.
  272. runnableExamples:
  273. doAssert toUpperAscii("FooBar!") == "FOOBAR!"
  274. toImpl toUpperAscii
  275. proc capitalizeAscii*(s: string): string {.noSideEffect, procvar,
  276. rtl, extern: "nsuCapitalizeAscii".} =
  277. ## Converts the first character of `s` into upper case.
  278. ##
  279. ## This works only for the letters ``A-Z``.
  280. runnableExamples:
  281. doAssert capitalizeAscii("foo") == "Foo"
  282. doAssert capitalizeAscii("-bar") == "-bar"
  283. if s.len == 0: result = ""
  284. else: result = toUpperAscii(s[0]) & substr(s, 1)
  285. proc normalize*(s: string): string {.noSideEffect, procvar,
  286. rtl, extern: "nsuNormalize".} =
  287. ## Normalizes the string `s`.
  288. ##
  289. ## That means to convert it to lower case and remove any '_'. This
  290. ## should NOT be used to normalize Nim identifier names.
  291. runnableExamples:
  292. doAssert normalize("Foo_bar") == "foobar"
  293. doAssert normalize("Foo Bar") == "foo bar"
  294. result = newString(s.len)
  295. var j = 0
  296. for i in 0..len(s) - 1:
  297. if s[i] in {'A'..'Z'}:
  298. result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
  299. inc j
  300. elif s[i] != '_':
  301. result[j] = s[i]
  302. inc j
  303. if j != s.len: setLen(result, j)
  304. proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
  305. rtl, extern: "nsuCmpIgnoreCase", procvar.} =
  306. ## Compares two strings in a case insensitive manner. Returns:
  307. ##
  308. ## | 0 iff a == b
  309. ## | < 0 iff a < b
  310. ## | > 0 iff a > b
  311. runnableExamples:
  312. doAssert cmpIgnoreCase("FooBar", "foobar") == 0
  313. doAssert cmpIgnoreCase("bar", "Foo") < 0
  314. doAssert cmpIgnoreCase("Foo5", "foo4") > 0
  315. var i = 0
  316. var m = min(a.len, b.len)
  317. while i < m:
  318. result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
  319. if result != 0: return
  320. inc(i)
  321. result = a.len - b.len
  322. {.push checks: off, line_trace: off .} # this is a hot-spot in the compiler!
  323. # thus we compile without checks here
  324. proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
  325. rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
  326. ## Semantically the same as ``cmp(normalize(a), normalize(b))``. It
  327. ## is just optimized to not allocate temporary strings. This should
  328. ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
  329. ## for that. Returns:
  330. ##
  331. ## | 0 iff a == b
  332. ## | < 0 iff a < b
  333. ## | > 0 iff a > b
  334. runnableExamples:
  335. doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
  336. doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
  337. var i = 0
  338. var j = 0
  339. while true:
  340. while i < a.len and a[i] == '_': inc i
  341. while j < b.len and b[j] == '_': inc j
  342. var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
  343. var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
  344. result = ord(aa) - ord(bb)
  345. if result != 0: return result
  346. # the characters are identical:
  347. if i >= a.len:
  348. # both cursors at the end:
  349. if j >= b.len: return 0
  350. # not yet at the end of 'b':
  351. return -1
  352. elif j >= b.len:
  353. return 1
  354. inc i
  355. inc j
  356. proc strip*(s: string, leading = true, trailing = true,
  357. chars: set[char] = Whitespace): string
  358. {.noSideEffect, rtl, extern: "nsuStrip".} =
  359. ## Strips leading or trailing `chars` from `s` and returns
  360. ## the resulting string.
  361. ##
  362. ## If `leading` is true, leading `chars` are stripped.
  363. ## If `trailing` is true, trailing `chars` are stripped.
  364. ## If both are false, the string is returned unchanged.
  365. runnableExamples:
  366. doAssert " vhellov ".strip().strip(trailing = false, chars = {'v'}) == "hellov"
  367. var
  368. first = 0
  369. last = len(s)-1
  370. if leading:
  371. while first <= last and s[first] in chars: inc(first)
  372. if trailing:
  373. while last >= 0 and s[last] in chars: dec(last)
  374. result = substr(s, first, last)
  375. proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
  376. ## Converts a character `c` to its octal representation.
  377. ##
  378. ## The resulting string may not have a leading zero. Its length is always
  379. ## exactly 3.
  380. runnableExamples:
  381. doAssert toOctal('!') == "041"
  382. result = newString(3)
  383. var val = ord(c)
  384. for i in countdown(2, 0):
  385. result[i] = chr(val mod 8 + ord('0'))
  386. val = val div 8
  387. proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl,
  388. extern: "nsuIsNilOrEmpty",
  389. deprecated: "use 'x.len == 0' instead".} =
  390. ## Checks if `s` is nil or empty.
  391. result = len(s) == 0
  392. proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} =
  393. ## Checks if `s` is nil or consists entirely of whitespace characters.
  394. result = true
  395. for c in s:
  396. if not c.isSpaceAscii():
  397. return false
  398. proc substrEq(s: string, pos: int, substr: string): bool =
  399. var i = 0
  400. var length = substr.len
  401. while i < length and s[pos+i] == substr[i]:
  402. inc i
  403. return i == length
  404. # --------- Private templates for different split separators -----------
  405. template stringHasSep(s: string, index: int, seps: set[char]): bool =
  406. s[index] in seps
  407. template stringHasSep(s: string, index: int, sep: char): bool =
  408. s[index] == sep
  409. template stringHasSep(s: string, index: int, sep: string): bool =
  410. s.substrEq(index, sep)
  411. template splitCommon(s, sep, maxsplit, sepLen) =
  412. ## Common code for split procedures
  413. var last = 0
  414. var splits = maxsplit
  415. while last <= len(s):
  416. var first = last
  417. while last < len(s) and not stringHasSep(s, last, sep):
  418. inc(last)
  419. if splits == 0: last = len(s)
  420. yield substr(s, first, last-1)
  421. if splits == 0: break
  422. dec(splits)
  423. inc(last, sepLen)
  424. template oldSplit(s, seps, maxsplit) =
  425. var last = 0
  426. var splits = maxsplit
  427. assert(not ('\0' in seps))
  428. while last < len(s):
  429. while last < len(s) and s[last] in seps: inc(last)
  430. var first = last
  431. while last < len(s) and s[last] notin seps: inc(last)
  432. if first <= last-1:
  433. if splits == 0: last = len(s)
  434. yield substr(s, first, last-1)
  435. if splits == 0: break
  436. dec(splits)
  437. iterator split*(s: string, seps: set[char] = Whitespace,
  438. maxsplit: int = -1): string =
  439. ## Splits the string `s` into substrings using a group of separators.
  440. ##
  441. ## Substrings are separated by a substring containing only `seps`.
  442. ##
  443. ## .. code-block:: nim
  444. ## for word in split("this\lis an\texample"):
  445. ## writeLine(stdout, word)
  446. ##
  447. ## ...generates this output:
  448. ##
  449. ## .. code-block::
  450. ## "this"
  451. ## "is"
  452. ## "an"
  453. ## "example"
  454. ##
  455. ## And the following code:
  456. ##
  457. ## .. code-block:: nim
  458. ## for word in split("this:is;an$example", {';', ':', '$'}):
  459. ## writeLine(stdout, word)
  460. ##
  461. ## ...produces the same output as the first example. The code:
  462. ##
  463. ## .. code-block:: nim
  464. ## let date = "2012-11-20T22:08:08.398990"
  465. ## let separators = {' ', '-', ':', 'T'}
  466. ## for number in split(date, separators):
  467. ## writeLine(stdout, number)
  468. ##
  469. ## ...results in:
  470. ##
  471. ## .. code-block::
  472. ## "2012"
  473. ## "11"
  474. ## "20"
  475. ## "22"
  476. ## "08"
  477. ## "08.398990"
  478. ##
  479. splitCommon(s, seps, maxsplit, 1)
  480. iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
  481. ## Splits the string ``s`` at whitespace stripping leading and trailing
  482. ## whitespace if necessary. If ``maxsplit`` is specified and is positive,
  483. ## no more than ``maxsplit`` splits is made.
  484. ##
  485. ## The following code:
  486. ##
  487. ## .. code-block:: nim
  488. ## let s = " foo \t bar baz "
  489. ## for ms in [-1, 1, 2, 3]:
  490. ## echo "------ maxsplit = ", ms, ":"
  491. ## for item in s.splitWhitespace(maxsplit=ms):
  492. ## echo '"', item, '"'
  493. ##
  494. ## ...results in:
  495. ##
  496. ## .. code-block::
  497. ## ------ maxsplit = -1:
  498. ## "foo"
  499. ## "bar"
  500. ## "baz"
  501. ## ------ maxsplit = 1:
  502. ## "foo"
  503. ## "bar baz "
  504. ## ------ maxsplit = 2:
  505. ## "foo"
  506. ## "bar"
  507. ## "baz "
  508. ## ------ maxsplit = 3:
  509. ## "foo"
  510. ## "bar"
  511. ## "baz"
  512. ##
  513. oldSplit(s, Whitespace, maxsplit)
  514. template accResult(iter: untyped) =
  515. result = @[]
  516. for x in iter: add(result, x)
  517. proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  518. rtl, extern: "nsuSplitWhitespace".} =
  519. ## The same as the `splitWhitespace <#splitWhitespace.i,string,int>`_
  520. ## iterator, but is a proc that returns a sequence of substrings.
  521. accResult(splitWhitespace(s, maxsplit))
  522. iterator split*(s: string, sep: char, maxsplit: int = -1): string =
  523. ## Splits the string `s` into substrings using a single separator.
  524. ##
  525. ## Substrings are separated by the character `sep`.
  526. ## The code:
  527. ##
  528. ## .. code-block:: nim
  529. ## for word in split(";;this;is;an;;example;;;", ';'):
  530. ## writeLine(stdout, word)
  531. ##
  532. ## Results in:
  533. ##
  534. ## .. code-block::
  535. ## ""
  536. ## ""
  537. ## "this"
  538. ## "is"
  539. ## "an"
  540. ## ""
  541. ## "example"
  542. ## ""
  543. ## ""
  544. ## ""
  545. ##
  546. splitCommon(s, sep, maxsplit, 1)
  547. iterator split*(s: string, sep: string, maxsplit: int = -1): string =
  548. ## Splits the string `s` into substrings using a string separator.
  549. ##
  550. ## Substrings are separated by the string `sep`.
  551. ## The code:
  552. ##
  553. ## .. code-block:: nim
  554. ## for word in split("thisDATAisDATAcorrupted", "DATA"):
  555. ## writeLine(stdout, word)
  556. ##
  557. ## Results in:
  558. ##
  559. ## .. code-block::
  560. ## "this"
  561. ## "is"
  562. ## "corrupted"
  563. ##
  564. splitCommon(s, sep, maxsplit, sep.len)
  565. template rsplitCommon(s, sep, maxsplit, sepLen) =
  566. ## Common code for rsplit functions
  567. var
  568. last = s.len - 1
  569. first = last
  570. splits = maxsplit
  571. startPos = 0
  572. # go to -1 in order to get separators at the beginning
  573. while first >= -1:
  574. while first >= 0 and not stringHasSep(s, first, sep):
  575. dec(first)
  576. if splits == 0:
  577. # No more splits means set first to the beginning
  578. first = -1
  579. if first == -1:
  580. startPos = 0
  581. else:
  582. startPos = first + sepLen
  583. yield substr(s, startPos, last)
  584. if splits == 0: break
  585. dec(splits)
  586. dec(first)
  587. last = first
  588. iterator rsplit*(s: string, seps: set[char] = Whitespace,
  589. maxsplit: int = -1): string =
  590. ## Splits the string `s` into substrings from the right using a
  591. ## string separator. Works exactly the same as `split iterator
  592. ## <#split.i,string,char,int>`_ except in reverse order.
  593. ##
  594. ## .. code-block:: nim
  595. ## for piece in "foo bar".rsplit(WhiteSpace):
  596. ## echo piece
  597. ##
  598. ## Results in:
  599. ##
  600. ## .. code-block:: nim
  601. ## "bar"
  602. ## "foo"
  603. ##
  604. ## Substrings are separated from the right by the set of chars `seps`
  605. rsplitCommon(s, seps, maxsplit, 1)
  606. iterator rsplit*(s: string, sep: char,
  607. maxsplit: int = -1): string =
  608. ## Splits the string `s` into substrings from the right using a
  609. ## string separator. Works exactly the same as `split iterator
  610. ## <#split.i,string,char,int>`_ except in reverse order.
  611. ##
  612. ## .. code-block:: nim
  613. ## for piece in "foo:bar".rsplit(':'):
  614. ## echo piece
  615. ##
  616. ## Results in:
  617. ##
  618. ## .. code-block:: nim
  619. ## "bar"
  620. ## "foo"
  621. ##
  622. ## Substrings are separated from the right by the char `sep`
  623. rsplitCommon(s, sep, maxsplit, 1)
  624. iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
  625. keepSeparators: bool = false): string =
  626. ## Splits the string `s` into substrings from the right using a
  627. ## string separator. Works exactly the same as `split iterator
  628. ## <#split.i,string,string,int>`_ except in reverse order.
  629. ##
  630. ## .. code-block:: nim
  631. ## for piece in "foothebar".rsplit("the"):
  632. ## echo piece
  633. ##
  634. ## Results in:
  635. ##
  636. ## .. code-block:: nim
  637. ## "bar"
  638. ## "foo"
  639. ##
  640. ## Substrings are separated from the right by the string `sep`
  641. rsplitCommon(s, sep, maxsplit, sep.len)
  642. iterator splitLines*(s: string, keepEol = false): string =
  643. ## Splits the string `s` into its containing lines.
  644. ##
  645. ## Every `character literal <manual.html#character-literals>`_ newline
  646. ## combination (CR, LF, CR-LF) is supported. The result strings contain no
  647. ## trailing end of line characters unless parameter ``keepEol`` is set to
  648. ## ``true``.
  649. ##
  650. ## Example:
  651. ##
  652. ## .. code-block:: nim
  653. ## for line in splitLines("\nthis\nis\nan\n\nexample\n"):
  654. ## writeLine(stdout, line)
  655. ##
  656. ## Results in:
  657. ##
  658. ## .. code-block:: nim
  659. ## ""
  660. ## "this"
  661. ## "is"
  662. ## "an"
  663. ## ""
  664. ## "example"
  665. ## ""
  666. var first = 0
  667. var last = 0
  668. var eolpos = 0
  669. while true:
  670. while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
  671. eolpos = last
  672. if last < s.len:
  673. if s[last] == '\l': inc(last)
  674. elif s[last] == '\c':
  675. inc(last)
  676. if last < s.len and s[last] == '\l': inc(last)
  677. yield substr(s, first, if keepEol: last-1 else: eolpos-1)
  678. # no eol characters consumed means that the string is over
  679. if eolpos == last:
  680. break
  681. first = last
  682. proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
  683. rtl, extern: "nsuSplitLines".} =
  684. ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a
  685. ## proc that returns a sequence of substrings.
  686. accResult(splitLines(s, keepEol=keepEol))
  687. proc countLines*(s: string): int {.noSideEffect,
  688. rtl, extern: "nsuCountLines".} =
  689. ## Returns the number of lines in the string `s`.
  690. ##
  691. ## This is the same as ``len(splitLines(s))``, but much more efficient
  692. ## because it doesn't modify the string creating temporal objects. Every
  693. ## `character literal <manual.html#character-literals>`_ newline combination
  694. ## (CR, LF, CR-LF) is supported.
  695. ##
  696. ## In this context, a line is any string seperated by a newline combination.
  697. ## A line can be an empty string.
  698. runnableExamples:
  699. doAssert countLines("First line\l and second line.") == 2
  700. result = 1
  701. var i = 0
  702. while i < s.len:
  703. case s[i]
  704. of '\c':
  705. if i+1 < s.len and s[i+1] == '\l': inc i
  706. inc result
  707. of '\l': inc result
  708. else: discard
  709. inc i
  710. proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.
  711. noSideEffect, rtl, extern: "nsuSplitCharSet".} =
  712. ## The same as the `split iterator <#split.i,string,set[char],int>`_, but is a
  713. ## proc that returns a sequence of substrings.
  714. runnableExamples:
  715. doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
  716. doAssert "".split({' '}) == @[""]
  717. accResult(split(s, seps, maxsplit))
  718. proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect,
  719. rtl, extern: "nsuSplitChar".} =
  720. ## The same as the `split iterator <#split.i,string,char,int>`_, but is a proc
  721. ## that returns a sequence of substrings.
  722. runnableExamples:
  723. doAssert "a,b,c".split(',') == @["a", "b", "c"]
  724. doAssert "".split(' ') == @[""]
  725. accResult(split(s, sep, maxsplit))
  726. proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  727. rtl, extern: "nsuSplitString".} =
  728. ## Splits the string `s` into substrings using a string separator.
  729. ##
  730. ## Substrings are separated by the string `sep`. This is a wrapper around the
  731. ## `split iterator <#split.i,string,string,int>`_.
  732. runnableExamples:
  733. doAssert "a,b,c".split(",") == @["a", "b", "c"]
  734. doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"]
  735. doAssert "".split("Elon Musk") == @[""]
  736. doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
  737. doAssert "a largely spaced sentence".split(" ", maxsplit=1) == @["a", " largely spaced sentence"]
  738. doAssert(sep.len > 0)
  739. accResult(split(s, sep, maxsplit))
  740. proc rsplit*(s: string, seps: set[char] = Whitespace,
  741. maxsplit: int = -1): seq[string]
  742. {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
  743. ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a
  744. ## proc that returns a sequence of substrings.
  745. ##
  746. ## A possible common use case for `rsplit` is path manipulation,
  747. ## particularly on systems that don't use a common delimiter.
  748. ##
  749. ## For example, if a system had `#` as a delimiter, you could
  750. ## do the following to get the tail of the path:
  751. ##
  752. ## .. code-block:: nim
  753. ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
  754. ##
  755. ## Results in `tailSplit` containing:
  756. ##
  757. ## .. code-block:: nim
  758. ## @["Root#Object#Method", "Index"]
  759. ##
  760. accResult(rsplit(s, seps, maxsplit))
  761. result.reverse()
  762. proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
  763. {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
  764. ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc
  765. ## that returns a sequence of substrings.
  766. ##
  767. ## A possible common use case for `rsplit` is path manipulation,
  768. ## particularly on systems that don't use a common delimiter.
  769. ##
  770. ## For example, if a system had `#` as a delimiter, you could
  771. ## do the following to get the tail of the path:
  772. ##
  773. ## .. code-block:: nim
  774. ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
  775. ##
  776. ## Results in `tailSplit` containing:
  777. ##
  778. ## .. code-block:: nim
  779. ## @["Root#Object#Method", "Index"]
  780. ##
  781. accResult(rsplit(s, sep, maxsplit))
  782. result.reverse()
  783. proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
  784. {.noSideEffect, rtl, extern: "nsuRSplitString".} =
  785. ## The same as the `rsplit iterator <#rsplit.i,string,string,int>`_, but is a proc
  786. ## that returns a sequence of substrings.
  787. ##
  788. ## A possible common use case for `rsplit` is path manipulation,
  789. ## particularly on systems that don't use a common delimiter.
  790. ##
  791. ## For example, if a system had `#` as a delimiter, you could
  792. ## do the following to get the tail of the path:
  793. ##
  794. ## .. code-block:: nim
  795. ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
  796. ##
  797. ## Results in `tailSplit` containing:
  798. ##
  799. ## .. code-block:: nim
  800. ## @["Root#Object#Method", "Index"]
  801. ##
  802. runnableExamples:
  803. doAssert "a largely spaced sentence".rsplit(" ", maxsplit=1) == @["a largely spaced", "sentence"]
  804. doAssert "a,b,c".rsplit(",") == @["a", "b", "c"]
  805. doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", "plan ", "canal panama"]
  806. doAssert "".rsplit("Elon Musk") == @[""]
  807. doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
  808. accResult(rsplit(s, sep, maxsplit))
  809. result.reverse()
  810. proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
  811. rtl, extern: "nsuToHex".} =
  812. ## Converts `x` to its hexadecimal representation.
  813. ##
  814. ## The resulting string will be exactly `len` characters long. No prefix like
  815. ## ``0x`` is generated. `x` is treated as an unsigned value.
  816. runnableExamples:
  817. doAssert toHex(1984, 6) == "0007C0"
  818. doAssert toHex(1984, 2) == "C0"
  819. const
  820. HexChars = "0123456789ABCDEF"
  821. var
  822. n = x
  823. result = newString(len)
  824. for j in countdown(len-1, 0):
  825. result[j] = HexChars[int(n and 0xF)]
  826. n = n shr 4
  827. # handle negative overflow
  828. if n == 0 and x < 0: n = -1
  829. proc toHex*[T: SomeInteger](x: T): string =
  830. ## Shortcut for ``toHex(x, T.sizeOf * 2)``
  831. runnableExamples:
  832. doAssert toHex(1984'i64) == "00000000000007C0"
  833. toHex(BiggestInt(x), T.sizeOf * 2)
  834. proc toHex*(s: string): string {.noSideEffect, rtl.} =
  835. ## Converts a bytes string to its hexadecimal representation.
  836. ##
  837. ## The output is twice the input long. No prefix like
  838. ## ``0x`` is generated.
  839. const HexChars = "0123456789ABCDEF"
  840. result = newString(s.len * 2)
  841. for pos, c in s:
  842. var n = ord(c)
  843. result[pos * 2 + 1] = HexChars[n and 0xF]
  844. n = n shr 4
  845. result[pos * 2] = HexChars[n]
  846. proc intToStr*(x: int, minchars: Positive = 1): string {.noSideEffect,
  847. rtl, extern: "nsuIntToStr".} =
  848. ## Converts `x` to its decimal representation.
  849. ##
  850. ## The resulting string will be minimally `minchars` characters long. This is
  851. ## achieved by adding leading zeros.
  852. runnableExamples:
  853. doAssert intToStr(1984) == "1984"
  854. doAssert intToStr(1984, 6) == "001984"
  855. result = $abs(x)
  856. for i in 1 .. minchars - len(result):
  857. result = '0' & result
  858. if x < 0:
  859. result = '-' & result
  860. proc parseInt*(s: string): int {.noSideEffect, procvar,
  861. rtl, extern: "nsuParseInt".} =
  862. ## Parses a decimal integer value contained in `s`.
  863. ##
  864. ## If `s` is not a valid integer, `ValueError` is raised.
  865. runnableExamples:
  866. doAssert parseInt("-0042") == -42
  867. let L = parseutils.parseInt(s, result, 0)
  868. if L != s.len or L == 0:
  869. raise newException(ValueError, "invalid integer: " & s)
  870. proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
  871. rtl, extern: "nsuParseBiggestInt".} =
  872. ## Parses a decimal integer value contained in `s`.
  873. ##
  874. ## If `s` is not a valid integer, `ValueError` is raised.
  875. let L = parseutils.parseBiggestInt(s, result, 0)
  876. if L != s.len or L == 0:
  877. raise newException(ValueError, "invalid integer: " & s)
  878. proc parseUInt*(s: string): uint {.noSideEffect, procvar,
  879. rtl, extern: "nsuParseUInt".} =
  880. ## Parses a decimal unsigned integer value contained in `s`.
  881. ##
  882. ## If `s` is not a valid integer, `ValueError` is raised.
  883. let L = parseutils.parseUInt(s, result, 0)
  884. if L != s.len or L == 0:
  885. raise newException(ValueError, "invalid unsigned integer: " & s)
  886. proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
  887. rtl, extern: "nsuParseBiggestUInt".} =
  888. ## Parses a decimal unsigned integer value contained in `s`.
  889. ##
  890. ## If `s` is not a valid integer, `ValueError` is raised.
  891. let L = parseutils.parseBiggestUInt(s, result, 0)
  892. if L != s.len or L == 0:
  893. raise newException(ValueError, "invalid unsigned integer: " & s)
  894. proc parseFloat*(s: string): float {.noSideEffect, procvar,
  895. rtl, extern: "nsuParseFloat".} =
  896. ## Parses a decimal floating point value contained in `s`. If `s` is not
  897. ## a valid floating point number, `ValueError` is raised. ``NAN``,
  898. ## ``INF``, ``-INF`` are also supported (case insensitive comparison).
  899. runnableExamples:
  900. doAssert parseFloat("3.14") == 3.14
  901. doAssert parseFloat("inf") == 1.0/0
  902. let L = parseutils.parseFloat(s, result, 0)
  903. if L != s.len or L == 0:
  904. raise newException(ValueError, "invalid float: " & s)
  905. proc parseBinInt*(s: string): int {.noSideEffect, procvar,
  906. rtl, extern: "nsuParseBinInt".} =
  907. ## Parses a binary integer value contained in `s`.
  908. ##
  909. ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
  910. ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within
  911. ## `s` are ignored.
  912. let L = parseutils.parseBin(s, result, 0)
  913. if L != s.len or L == 0:
  914. raise newException(ValueError, "invalid binary integer: " & s)
  915. proc parseOctInt*(s: string): int {.noSideEffect,
  916. rtl, extern: "nsuParseOctInt".} =
  917. ## Parses an octal integer value contained in `s`.
  918. ##
  919. ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
  920. ## of the following optional prefixes: ``0o``, ``0O``. Underscores within
  921. ## `s` are ignored.
  922. let L = parseutils.parseOct(s, result, 0)
  923. if L != s.len or L == 0:
  924. raise newException(ValueError, "invalid oct integer: " & s)
  925. proc parseHexInt*(s: string): int {.noSideEffect, procvar,
  926. rtl, extern: "nsuParseHexInt".} =
  927. ## Parses a hexadecimal integer value contained in `s`.
  928. ##
  929. ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
  930. ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores
  931. ## within `s` are ignored.
  932. let L = parseutils.parseHex(s, result, 0)
  933. if L != s.len or L == 0:
  934. raise newException(ValueError, "invalid hex integer: " & s)
  935. proc generateHexCharToValueMap(): string =
  936. ## Generate a string to map a hex digit to uint value
  937. result = ""
  938. for inp in 0..255:
  939. let ch = chr(inp)
  940. let o =
  941. case ch:
  942. of '0'..'9': inp - ord('0')
  943. of 'a'..'f': inp - ord('a') + 10
  944. of 'A'..'F': inp - ord('A') + 10
  945. else: 17 # indicates an invalid hex char
  946. result.add chr(o)
  947. const hexCharToValueMap = generateHexCharToValueMap()
  948. proc parseHexStr*(s: string): string {.noSideEffect, procvar,
  949. rtl, extern: "nsuParseHexStr".} =
  950. ## Convert hex-encoded string to byte string, e.g.:
  951. ##
  952. ## .. code-block:: nim
  953. ## hexToStr("00ff") == "\0\255"
  954. ##
  955. ## Raises ``ValueError`` for an invalid hex values. The comparison is
  956. ## case-insensitive.
  957. if s.len mod 2 != 0:
  958. raise newException(ValueError, "Incorrect hex string len")
  959. result = newString(s.len div 2)
  960. var buf = 0
  961. for pos, c in s:
  962. let val = hexCharToValueMap[ord(c)].ord
  963. if val == 17:
  964. raise newException(ValueError, "Invalid hex char " & repr(c))
  965. if pos mod 2 == 0:
  966. buf = val
  967. else:
  968. result[pos div 2] = chr(val + buf shl 4)
  969. proc parseBool*(s: string): bool =
  970. ## Parses a value into a `bool`.
  971. ##
  972. ## If ``s`` is one of the following values: ``y, yes, true, 1, on``, then
  973. ## returns `true`. If ``s`` is one of the following values: ``n, no, false,
  974. ## 0, off``, then returns `false`. If ``s`` is something else a
  975. ## ``ValueError`` exception is raised.
  976. case normalize(s)
  977. of "y", "yes", "true", "1", "on": result = true
  978. of "n", "no", "false", "0", "off": result = false
  979. else: raise newException(ValueError, "cannot interpret as a bool: " & s)
  980. proc parseEnum*[T: enum](s: string): T =
  981. ## Parses an enum ``T``.
  982. ##
  983. ## Raises ``ValueError`` for an invalid value in `s`. The comparison is
  984. ## done in a style insensitive way.
  985. for e in low(T)..high(T):
  986. if cmpIgnoreStyle(s, $e) == 0:
  987. return e
  988. raise newException(ValueError, "invalid enum value: " & s)
  989. proc parseEnum*[T: enum](s: string, default: T): T =
  990. ## Parses an enum ``T``.
  991. ##
  992. ## Uses `default` for an invalid value in `s`. The comparison is done in a
  993. ## style insensitive way.
  994. for e in low(T)..high(T):
  995. if cmpIgnoreStyle(s, $e) == 0:
  996. return e
  997. result = default
  998. proc repeat*(c: char, count: Natural): string {.noSideEffect,
  999. rtl, extern: "nsuRepeatChar".} =
  1000. ## Returns a string of length `count` consisting only of
  1001. ## the character `c`. You can use this proc to left align strings. Example:
  1002. ##
  1003. ## .. code-block:: nim
  1004. ## proc tabexpand(indent: int, text: string, tabsize: int = 4) =
  1005. ## echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize),
  1006. ## text
  1007. ##
  1008. ## tabexpand(4, "At four")
  1009. ## tabexpand(5, "At five")
  1010. ## tabexpand(6, "At six")
  1011. result = newString(count)
  1012. for i in 0..count-1: result[i] = c
  1013. proc repeat*(s: string, n: Natural): string {.noSideEffect,
  1014. rtl, extern: "nsuRepeatStr".} =
  1015. ## Returns String `s` concatenated `n` times. Example:
  1016. ##
  1017. ## .. code-block:: nim
  1018. ## echo "+++ STOP ".repeat(4), "+++"
  1019. result = newStringOfCap(n * s.len)
  1020. for i in 1..n: result.add(s)
  1021. template spaces*(n: Natural): string = repeat(' ', n)
  1022. ## Returns a String with `n` space characters. You can use this proc
  1023. ## to left align strings. Example:
  1024. ##
  1025. ## .. code-block:: nim
  1026. ## let
  1027. ## width = 15
  1028. ## text1 = "Hello user!"
  1029. ## text2 = "This is a very long string"
  1030. ## echo text1 & spaces(max(0, width - text1.len)) & "|"
  1031. ## echo text2 & spaces(max(0, width - text2.len)) & "|"
  1032. proc align*(s: string, count: Natural, padding = ' '): string {.
  1033. noSideEffect, rtl, extern: "nsuAlignString".} =
  1034. ## Aligns a string `s` with `padding`, so that it is of length `count`.
  1035. ##
  1036. ## `padding` characters (by default spaces) are added before `s` resulting in
  1037. ## right alignment. If ``s.len >= count``, no spaces are added and `s` is
  1038. ## returned unchanged. If you need to left align a string use the `alignLeft
  1039. ## proc <#alignLeft>`_. Example:
  1040. ##
  1041. ## .. code-block:: nim
  1042. ## assert align("abc", 4) == " abc"
  1043. ## assert align("a", 0) == "a"
  1044. ## assert align("1232", 6) == " 1232"
  1045. ## assert align("1232", 6, '#') == "##1232"
  1046. if s.len < count:
  1047. result = newString(count)
  1048. let spaces = count - s.len
  1049. for i in 0..spaces-1: result[i] = padding
  1050. for i in spaces..count-1: result[i] = s[i-spaces]
  1051. else:
  1052. result = s
  1053. proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect.} =
  1054. ## Left-Aligns a string `s` with `padding`, so that it is of length `count`.
  1055. ##
  1056. ## `padding` characters (by default spaces) are added after `s` resulting in
  1057. ## left alignment. If ``s.len >= count``, no spaces are added and `s` is
  1058. ## returned unchanged. If you need to right align a string use the `align
  1059. ## proc <#align>`_. Example:
  1060. ##
  1061. ## .. code-block:: nim
  1062. ## assert alignLeft("abc", 4) == "abc "
  1063. ## assert alignLeft("a", 0) == "a"
  1064. ## assert alignLeft("1232", 6) == "1232 "
  1065. ## assert alignLeft("1232", 6, '#') == "1232##"
  1066. if s.len < count:
  1067. result = newString(count)
  1068. if s.len > 0:
  1069. result[0 .. (s.len - 1)] = s
  1070. for i in s.len ..< count:
  1071. result[i] = padding
  1072. else:
  1073. result = s
  1074. iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
  1075. token: string, isSep: bool] =
  1076. ## Tokenizes the string `s` into substrings.
  1077. ##
  1078. ## Substrings are separated by a substring containing only `seps`.
  1079. ## Examples:
  1080. ##
  1081. ## .. code-block:: nim
  1082. ## for word in tokenize(" this is an example "):
  1083. ## writeLine(stdout, word)
  1084. ##
  1085. ## Results in:
  1086. ##
  1087. ## .. code-block:: nim
  1088. ## (" ", true)
  1089. ## ("this", false)
  1090. ## (" ", true)
  1091. ## ("is", false)
  1092. ## (" ", true)
  1093. ## ("an", false)
  1094. ## (" ", true)
  1095. ## ("example", false)
  1096. ## (" ", true)
  1097. var i = 0
  1098. while true:
  1099. var j = i
  1100. var isSep = j < s.len and s[j] in seps
  1101. while j < s.len and (s[j] in seps) == isSep: inc(j)
  1102. if j > i:
  1103. yield (substr(s, i, j-1), isSep)
  1104. else:
  1105. break
  1106. i = j
  1107. proc wordWrap*(s: string, maxLineWidth = 80,
  1108. splitLongWords = true,
  1109. seps: set[char] = Whitespace,
  1110. newLine = "\n"): string {.
  1111. noSideEffect, rtl, extern: "nsuWordWrap".} =
  1112. ## Word wraps `s`.
  1113. result = newStringOfCap(s.len + s.len shr 6)
  1114. var spaceLeft = maxLineWidth
  1115. var lastSep = ""
  1116. for word, isSep in tokenize(s, seps):
  1117. if isSep:
  1118. lastSep = word
  1119. spaceLeft = spaceLeft - len(word)
  1120. continue
  1121. if len(word) > spaceLeft:
  1122. if splitLongWords and len(word) > maxLineWidth:
  1123. result.add(substr(word, 0, spaceLeft-1))
  1124. var w = spaceLeft
  1125. var wordLeft = len(word) - spaceLeft
  1126. while wordLeft > 0:
  1127. result.add(newLine)
  1128. var L = min(maxLineWidth, wordLeft)
  1129. spaceLeft = maxLineWidth - L
  1130. result.add(substr(word, w, w+L-1))
  1131. inc(w, L)
  1132. dec(wordLeft, L)
  1133. else:
  1134. spaceLeft = maxLineWidth - len(word)
  1135. result.add(newLine)
  1136. result.add(word)
  1137. else:
  1138. spaceLeft = spaceLeft - len(word)
  1139. result.add(lastSep & word)
  1140. lastSep.setLen(0)
  1141. proc indent*(s: string, count: Natural, padding: string = " "): string
  1142. {.noSideEffect, rtl, extern: "nsuIndent".} =
  1143. ## Indents each line in ``s`` by ``count`` amount of ``padding``.
  1144. ##
  1145. ## **Note:** This does not preserve the new line characters used in ``s``.
  1146. runnableExamples:
  1147. doAssert indent("First line\c\l and second line.", 2) == " First line\l and second line."
  1148. result = ""
  1149. var i = 0
  1150. for line in s.splitLines():
  1151. if i != 0:
  1152. result.add("\n")
  1153. for j in 1..count:
  1154. result.add(padding)
  1155. result.add(line)
  1156. i.inc
  1157. proc unindent*(s: string, count: Natural, padding: string = " "): string
  1158. {.noSideEffect, rtl, extern: "nsuUnindent".} =
  1159. ## Unindents each line in ``s`` by ``count`` amount of ``padding``.
  1160. ##
  1161. ## **Note:** This does not preserve the new line characters used in ``s``.
  1162. runnableExamples:
  1163. doAssert unindent(" First line\l and second line", 3) == "First line\land second line"
  1164. result = ""
  1165. var i = 0
  1166. for line in s.splitLines():
  1167. if i != 0:
  1168. result.add("\n")
  1169. var indentCount = 0
  1170. for j in 0..<count.int:
  1171. indentCount.inc
  1172. if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding:
  1173. indentCount = j
  1174. break
  1175. result.add(line[indentCount*padding.len .. ^1])
  1176. i.inc
  1177. proc unindent*(s: string): string
  1178. {.noSideEffect, rtl, extern: "nsuUnindentAll".} =
  1179. ## Removes all indentation composed of whitespace from each line in ``s``.
  1180. ##
  1181. ## For example:
  1182. ##
  1183. ## .. code-block:: nim
  1184. ## const x = """
  1185. ## Hello
  1186. ## There
  1187. ## """.unindent()
  1188. ##
  1189. ## doAssert x == "Hello\nThere\n"
  1190. unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish.
  1191. proc startsWith*(s, prefix: string): bool {.noSideEffect,
  1192. rtl, extern: "nsuStartsWith".} =
  1193. ## Returns true iff ``s`` starts with ``prefix``.
  1194. ##
  1195. ## If ``prefix == ""`` true is returned.
  1196. var i = 0
  1197. while true:
  1198. if i >= prefix.len: return true
  1199. if i >= s.len or s[i] != prefix[i]: return false
  1200. inc(i)
  1201. proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} =
  1202. ## Returns true iff ``s`` starts with ``prefix``.
  1203. result = s.len > 0 and s[0] == prefix
  1204. proc endsWith*(s, suffix: string): bool {.noSideEffect,
  1205. rtl, extern: "nsuEndsWith".} =
  1206. ## Returns true iff ``s`` ends with ``suffix``.
  1207. ##
  1208. ## If ``suffix == ""`` true is returned.
  1209. var i = 0
  1210. var j = len(s) - len(suffix)
  1211. while i+j <% s.len:
  1212. if s[i+j] != suffix[i]: return false
  1213. inc(i)
  1214. if i >= suffix.len: return true
  1215. proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} =
  1216. ## Returns true iff ``s`` ends with ``suffix``.
  1217. result = s.len > 0 and s[s.high] == suffix
  1218. proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
  1219. rtl, extern: "nsuContinuesWith".} =
  1220. ## Returns true iff ``s`` continues with ``substr`` at position ``start``.
  1221. ##
  1222. ## If ``substr == ""`` true is returned.
  1223. var i = 0
  1224. while true:
  1225. if i >= substr.len: return true
  1226. if i+start >= s.len or s[i+start] != substr[i]: return false
  1227. inc(i)
  1228. proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
  1229. {.noSideEffect, inline.} =
  1230. ## Adds a separator to `dest` only if its length is bigger than `startLen`.
  1231. ##
  1232. ## A shorthand for:
  1233. ##
  1234. ## .. code-block:: nim
  1235. ## if dest.len > startLen: add(dest, sep)
  1236. ##
  1237. ## This is often useful for generating some code where the items need to
  1238. ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
  1239. ## `startLen`. The following example creates a string describing
  1240. ## an array of integers.
  1241. runnableExamples:
  1242. var arr = "["
  1243. for x in items([2, 3, 5, 7, 11]):
  1244. addSep(arr, startLen=len("["))
  1245. add(arr, $x)
  1246. add(arr, "]")
  1247. if dest.len > startLen: add(dest, sep)
  1248. proc allCharsInSet*(s: string, theSet: set[char]): bool =
  1249. ## Returns true iff each character of `s` is in the set `theSet`.
  1250. runnableExamples:
  1251. doAssert allCharsInSet("aeea", {'a', 'e'}) == true
  1252. doAssert allCharsInSet("", {'a', 'e'}) == true
  1253. for c in items(s):
  1254. if c notin theSet: return false
  1255. return true
  1256. proc abbrev*(s: string, possibilities: openArray[string]): int =
  1257. ## Returns the index of the first item in ``possibilities`` which starts with ``s``, if not ambiguous.
  1258. ##
  1259. ## Returns -1 if no item has been found and -2 if multiple items match.
  1260. runnableExamples:
  1261. doAssert abbrev("fac", ["college", "faculty", "industry"]) == 1
  1262. doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found
  1263. doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous
  1264. doAssert abbrev("college", ["college", "colleges", "industry"]) == 0
  1265. result = -1 # none found
  1266. for i in 0..possibilities.len-1:
  1267. if possibilities[i].startsWith(s):
  1268. if possibilities[i] == s:
  1269. # special case: exact match shouldn't be ambiguous
  1270. return i
  1271. if result >= 0: return -2 # ambiguous
  1272. result = i
  1273. # ---------------------------------------------------------------------------
  1274. proc join*(a: openArray[string], sep: string = ""): string {.
  1275. noSideEffect, rtl, extern: "nsuJoinSep".} =
  1276. ## Concatenates all strings in `a` separating them with `sep`.
  1277. runnableExamples:
  1278. doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion"
  1279. if len(a) > 0:
  1280. var L = sep.len * (a.len-1)
  1281. for i in 0..high(a): inc(L, a[i].len)
  1282. result = newStringOfCap(L)
  1283. add(result, a[0])
  1284. for i in 1..high(a):
  1285. add(result, sep)
  1286. add(result, a[i])
  1287. else:
  1288. result = ""
  1289. proc join*[T: not string](a: openArray[T], sep: string = ""): string {.
  1290. noSideEffect, rtl.} =
  1291. ## Converts all elements in `a` to strings using `$` and concatenates them
  1292. ## with `sep`.
  1293. runnableExamples:
  1294. doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3"
  1295. result = ""
  1296. for i, x in a:
  1297. if i > 0:
  1298. add(result, sep)
  1299. add(result, $x)
  1300. type
  1301. SkipTable* = array[char, int]
  1302. proc initSkipTable*(a: var SkipTable, sub: string)
  1303. {.noSideEffect, rtl, extern: "nsuInitSkipTable".} =
  1304. ## Preprocess table `a` for `sub`.
  1305. let m = len(sub)
  1306. var i = 0
  1307. while i <= 0xff-7:
  1308. a[chr(i + 0)] = m
  1309. a[chr(i + 1)] = m
  1310. a[chr(i + 2)] = m
  1311. a[chr(i + 3)] = m
  1312. a[chr(i + 4)] = m
  1313. a[chr(i + 5)] = m
  1314. a[chr(i + 6)] = m
  1315. a[chr(i + 7)] = m
  1316. i += 8
  1317. for i in 0 ..< m - 1:
  1318. a[sub[i]] = m - 1 - i
  1319. proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int
  1320. {.noSideEffect, rtl, extern: "nsuFindStrA".} =
  1321. ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed table `a`.
  1322. ## If `last` is unspecified, it defaults to `s.high`.
  1323. ##
  1324. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1325. let
  1326. last = if last==0: s.high else: last
  1327. sLen = last - start + 1
  1328. subLast = sub.len - 1
  1329. if subLast == -1:
  1330. # this was an empty needle string,
  1331. # we count this as match in the first possible position:
  1332. return start
  1333. # This is an implementation of the Boyer-Moore Horspool algorithms
  1334. # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
  1335. var skip = start
  1336. while last - skip >= subLast:
  1337. var i = subLast
  1338. while s[skip + i] == sub[i]:
  1339. if i == 0:
  1340. return skip
  1341. dec i
  1342. inc skip, a[s[skip + subLast]]
  1343. return -1
  1344. when not (defined(js) or defined(nimdoc) or defined(nimscript)):
  1345. proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
  1346. importc: "memchr", header: "<string.h>" .}
  1347. const hasCStringBuiltin = true
  1348. else:
  1349. const hasCStringBuiltin = false
  1350. proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffect,
  1351. rtl, extern: "nsuFindChar".} =
  1352. ## Searches for `sub` in `s` inside range `start`..`last`.
  1353. ## If `last` is unspecified, it defaults to `s.high`.
  1354. ##
  1355. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1356. let last = if last==0: s.high else: last
  1357. when nimvm:
  1358. for i in int(start)..last:
  1359. if sub == s[i]: return i
  1360. else:
  1361. when hasCStringBuiltin:
  1362. let L = last-start+1
  1363. if L > 0:
  1364. let found = c_memchr(s[start].unsafeAddr, sub, L)
  1365. if not found.isNil:
  1366. return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
  1367. else:
  1368. for i in int(start)..last:
  1369. if sub == s[i]: return i
  1370. return -1
  1371. proc find*(s, sub: string, start: Natural = 0, last = 0): int {.noSideEffect,
  1372. rtl, extern: "nsuFindStr".} =
  1373. ## Searches for `sub` in `s` inside range `start`..`last`.
  1374. ## If `last` is unspecified, it defaults to `s.high`.
  1375. ##
  1376. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1377. if sub.len > s.len: return -1
  1378. if sub.len == 1: return find(s, sub[0], start, last)
  1379. var a {.noinit.}: SkipTable
  1380. initSkipTable(a, sub)
  1381. result = find(a, s, sub, start, last)
  1382. proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noSideEffect,
  1383. rtl, extern: "nsuFindCharSet".} =
  1384. ## Searches for `chars` in `s` inside range `start`..`last`.
  1385. ## If `last` is unspecified, it defaults to `s.high`.
  1386. ##
  1387. ## If `s` contains none of the characters in `chars`, -1 is returned.
  1388. let last = if last==0: s.high else: last
  1389. for i in int(start)..last:
  1390. if s[i] in chars: return i
  1391. return -1
  1392. proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} =
  1393. ## Searches for `sub` in `s` in reverse, starting at `start` and going
  1394. ## backwards to 0.
  1395. ##
  1396. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1397. let realStart = if start == -1: s.len else: start
  1398. for i in countdown(realStart-sub.len, 0):
  1399. for j in 0..sub.len-1:
  1400. result = i
  1401. if sub[j] != s[i+j]:
  1402. result = -1
  1403. break
  1404. if result != -1: return
  1405. return -1
  1406. proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect,
  1407. rtl.} =
  1408. ## Searches for `sub` in `s` in reverse starting at position `start`.
  1409. ##
  1410. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1411. let realStart = if start == -1: s.len-1 else: start
  1412. for i in countdown(realStart, 0):
  1413. if sub == s[i]: return i
  1414. return -1
  1415. proc rfind*(s: string, chars: set[char], start: int = -1): int {.noSideEffect.} =
  1416. ## Searches for `chars` in `s` in reverse starting at position `start`.
  1417. ##
  1418. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1419. let realStart = if start == -1: s.len-1 else: start
  1420. for i in countdown(realStart, 0):
  1421. if s[i] in chars: return i
  1422. return -1
  1423. proc center*(s: string, width: int, fillChar: char = ' '): string {.
  1424. noSideEffect, rtl, extern: "nsuCenterString".} =
  1425. ## Return the contents of `s` centered in a string `width` long using
  1426. ## `fillChar` as padding.
  1427. ##
  1428. ## The original string is returned if `width` is less than or equal
  1429. ## to `s.len`.
  1430. if width <= s.len: return s
  1431. result = newString(width)
  1432. # Left padding will be one fillChar
  1433. # smaller if there are an odd number
  1434. # of characters
  1435. let
  1436. charsLeft = (width - s.len)
  1437. leftPadding = charsLeft div 2
  1438. for i in 0 ..< width:
  1439. if i >= leftPadding and i < leftPadding + s.len:
  1440. # we are where the string should be located
  1441. result[i] = s[i-leftPadding]
  1442. else:
  1443. # we are either before or after where
  1444. # the string s should go
  1445. result[i] = fillChar
  1446. proc count*(s: string, sub: string, overlapping: bool = false): int {.
  1447. noSideEffect, rtl, extern: "nsuCountString".} =
  1448. ## Count the occurrences of a substring `sub` in the string `s`.
  1449. ## Overlapping occurrences of `sub` only count when `overlapping`
  1450. ## is set to true.
  1451. doAssert sub.len > 0
  1452. var i = 0
  1453. while true:
  1454. i = s.find(sub, i)
  1455. if i < 0: break
  1456. if overlapping: inc i
  1457. else: i += sub.len
  1458. inc result
  1459. proc count*(s: string, sub: char): int {.noSideEffect,
  1460. rtl, extern: "nsuCountChar".} =
  1461. ## Count the occurrences of the character `sub` in the string `s`.
  1462. for c in s:
  1463. if c == sub: inc result
  1464. proc count*(s: string, subs: set[char]): int {.noSideEffect,
  1465. rtl, extern: "nsuCountCharSet".} =
  1466. ## Count the occurrences of the group of character `subs` in the string `s`.
  1467. doAssert card(subs) > 0
  1468. for c in s:
  1469. if c in subs: inc result
  1470. proc quoteIfContainsWhite*(s: string): string {.deprecated.} =
  1471. ## Returns ``'"' & s & '"'`` if `s` contains a space and does not
  1472. ## start with a quote, else returns `s`.
  1473. ##
  1474. ## **DEPRECATED** as it was confused for shell quoting function. For this
  1475. ## application use `osproc.quoteShell <osproc.html#quoteShell>`_.
  1476. if find(s, {' ', '\t'}) >= 0 and s[0] != '"': result = '"' & s & '"'
  1477. else: result = s
  1478. proc contains*(s: string, c: char): bool {.noSideEffect.} =
  1479. ## Same as ``find(s, c) >= 0``.
  1480. return find(s, c) >= 0
  1481. proc contains*(s, sub: string): bool {.noSideEffect.} =
  1482. ## Same as ``find(s, sub) >= 0``.
  1483. return find(s, sub) >= 0
  1484. proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} =
  1485. ## Same as ``find(s, chars) >= 0``.
  1486. return find(s, chars) >= 0
  1487. proc replace*(s, sub: string, by = ""): string {.noSideEffect,
  1488. rtl, extern: "nsuReplaceStr".} =
  1489. ## Replaces `sub` in `s` by the string `by`.
  1490. result = ""
  1491. let subLen = sub.len
  1492. if subLen == 0:
  1493. for c in s:
  1494. add result, by
  1495. add result, c
  1496. add result, by
  1497. return
  1498. elif subLen == 1:
  1499. # when the pattern is a single char, we use a faster
  1500. # char-based search that doesn't need a skip table:
  1501. let c = sub[0]
  1502. let last = s.high
  1503. var i = 0
  1504. while true:
  1505. let j = find(s, c, i, last)
  1506. if j < 0: break
  1507. add result, substr(s, i, j - 1)
  1508. add result, by
  1509. i = j + subLen
  1510. # copy the rest:
  1511. add result, substr(s, i)
  1512. else:
  1513. var a {.noinit.}: SkipTable
  1514. initSkipTable(a, sub)
  1515. let last = s.high
  1516. var i = 0
  1517. while true:
  1518. let j = find(a, s, sub, i, last)
  1519. if j < 0: break
  1520. add result, substr(s, i, j - 1)
  1521. add result, by
  1522. i = j + subLen
  1523. # copy the rest:
  1524. add result, substr(s, i)
  1525. proc replace*(s: string, sub, by: char): string {.noSideEffect,
  1526. rtl, extern: "nsuReplaceChar".} =
  1527. ## Replaces `sub` in `s` by the character `by`.
  1528. ##
  1529. ## Optimized version of `replace <#replace,string,string>`_ for characters.
  1530. result = newString(s.len)
  1531. var i = 0
  1532. while i < s.len:
  1533. if s[i] == sub: result[i] = by
  1534. else: result[i] = s[i]
  1535. inc(i)
  1536. proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
  1537. rtl, extern: "nsuReplaceWord".} =
  1538. ## Replaces `sub` in `s` by the string `by`.
  1539. ##
  1540. ## Each occurrence of `sub` has to be surrounded by word boundaries
  1541. ## (comparable to ``\\w`` in regular expressions), otherwise it is not
  1542. ## replaced.
  1543. if sub.len == 0: return s
  1544. const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
  1545. var a {.noinit.}: SkipTable
  1546. result = ""
  1547. initSkipTable(a, sub)
  1548. var i = 0
  1549. let last = s.high
  1550. let sublen = max(sub.len, 1)
  1551. while true:
  1552. var j = find(a, s, sub, i, last)
  1553. if j < 0: break
  1554. # word boundary?
  1555. if (j == 0 or s[j-1] notin wordChars) and
  1556. (j+sub.len >= s.len or s[j+sub.len] notin wordChars):
  1557. add result, substr(s, i, j - 1)
  1558. add result, by
  1559. i = j + sublen
  1560. else:
  1561. add result, substr(s, i, j)
  1562. i = j + 1
  1563. # copy the rest:
  1564. add result, substr(s, i)
  1565. proc multiReplace*(s: string, replacements: varargs[(string, string)]): string {.noSideEffect.} =
  1566. ## Same as replace, but specialized for doing multiple replacements in a single
  1567. ## pass through the input string.
  1568. ##
  1569. ## multiReplace performs all replacements in a single pass, this means it can be used
  1570. ## to swap the occurences of "a" and "b", for instance.
  1571. ##
  1572. ## If the resulting string is not longer than the original input string, only a single
  1573. ## memory allocation is required.
  1574. ##
  1575. ## The order of the replacements does matter. Earlier replacements are preferred over later
  1576. ## replacements in the argument list.
  1577. result = newStringOfCap(s.len)
  1578. var i = 0
  1579. var fastChk: set[char] = {}
  1580. for tup in replacements: fastChk.incl(tup[0][0]) # Include first character of all replacements
  1581. while i < s.len:
  1582. block sIteration:
  1583. # Assume most chars in s are not candidates for any replacement operation
  1584. if s[i] in fastChk:
  1585. for tup in replacements:
  1586. if s.continuesWith(tup[0], i):
  1587. add result, tup[1]
  1588. inc(i, tup[0].len)
  1589. break sIteration
  1590. # No matching replacement found
  1591. # copy current character from s
  1592. add result, s[i]
  1593. inc(i)
  1594. proc delete*(s: var string, first, last: int) {.noSideEffect,
  1595. rtl, extern: "nsuDelete".} =
  1596. ## Deletes in `s` the characters at position `first` .. `last`.
  1597. ##
  1598. ## This modifies `s` itself, it does not return a copy.
  1599. var i = first
  1600. var j = last+1
  1601. var newLen = len(s)-j+i
  1602. while i < newLen:
  1603. s[i] = s[j]
  1604. inc(i)
  1605. inc(j)
  1606. setLen(s, newLen)
  1607. proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1608. rtl, extern: "nsuToOct".} =
  1609. ## Converts `x` into its octal representation.
  1610. ##
  1611. ## The resulting string is always `len` characters long. No leading ``0o``
  1612. ## prefix is generated.
  1613. var
  1614. mask: BiggestInt = 7
  1615. shift: BiggestInt = 0
  1616. assert(len > 0)
  1617. result = newString(len)
  1618. for j in countdown(len-1, 0):
  1619. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1620. shift = shift + 3
  1621. mask = mask shl 3
  1622. proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1623. rtl, extern: "nsuToBin".} =
  1624. ## Converts `x` into its binary representation.
  1625. ##
  1626. ## The resulting string is always `len` characters long. No leading ``0b``
  1627. ## prefix is generated.
  1628. var
  1629. mask: BiggestInt = 1
  1630. shift: BiggestInt = 0
  1631. assert(len > 0)
  1632. result = newString(len)
  1633. for j in countdown(len-1, 0):
  1634. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1635. shift = shift + 1
  1636. mask = mask shl 1
  1637. proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect,
  1638. rtl, extern: "nsuInsertSep".} =
  1639. ## Inserts the separator `sep` after `digits` digits from right to left.
  1640. ##
  1641. ## Even though the algorithm works with any string `s`, it is only useful
  1642. ## if `s` contains a number.
  1643. runnableExamples:
  1644. doAssert insertSep("1000000") == "1_000_000"
  1645. var L = (s.len-1) div digits + s.len
  1646. result = newString(L)
  1647. var j = 0
  1648. dec(L)
  1649. for i in countdown(len(s)-1, 0):
  1650. if j == digits:
  1651. result[L] = sep
  1652. dec(L)
  1653. j = 0
  1654. result[L] = s[i]
  1655. inc(j)
  1656. dec(L)
  1657. proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1658. rtl, extern: "nsuEscape".} =
  1659. ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_
  1660. ## for the escaping scheme.
  1661. ##
  1662. ## The resulting string is prefixed with `prefix` and suffixed with `suffix`.
  1663. ## Both may be empty strings.
  1664. result = newStringOfCap(s.len + s.len shr 2)
  1665. result.add(prefix)
  1666. for c in items(s):
  1667. case c
  1668. of '\0'..'\31', '\127'..'\255':
  1669. add(result, "\\x")
  1670. add(result, toHex(ord(c), 2))
  1671. of '\\': add(result, "\\\\")
  1672. of '\'': add(result, "\\'")
  1673. of '\"': add(result, "\\\"")
  1674. else: add(result, c)
  1675. add(result, suffix)
  1676. proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1677. rtl, extern: "nsuUnescape".} =
  1678. ## Unescapes a string `s`.
  1679. ##
  1680. ## This complements `escape <#escape>`_ as it performs the opposite
  1681. ## operations.
  1682. ##
  1683. ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
  1684. ## ValueError exception will be raised.
  1685. result = newStringOfCap(s.len)
  1686. var i = prefix.len
  1687. if not s.startsWith(prefix):
  1688. raise newException(ValueError,
  1689. "String does not start with: " & prefix)
  1690. while true:
  1691. if i >= s.len-suffix.len: break
  1692. if s[i] == '\\':
  1693. if i+1 >= s.len:
  1694. result.add('\\')
  1695. break
  1696. case s[i+1]:
  1697. of 'x':
  1698. inc i, 2
  1699. var c: int
  1700. i += parseutils.parseHex(s, c, i, maxLen=2)
  1701. result.add(chr(c))
  1702. dec i, 2
  1703. of '\\':
  1704. result.add('\\')
  1705. of '\'':
  1706. result.add('\'')
  1707. of '\"':
  1708. result.add('\"')
  1709. else:
  1710. result.add("\\" & s[i+1])
  1711. inc(i, 2)
  1712. else:
  1713. result.add(s[i])
  1714. inc(i)
  1715. if not s.endsWith(suffix):
  1716. raise newException(ValueError,
  1717. "String does not end in: " & suffix)
  1718. proc validIdentifier*(s: string): bool {.noSideEffect,
  1719. rtl, extern: "nsuValidIdentifier".} =
  1720. ## Returns true if `s` is a valid identifier.
  1721. ##
  1722. ## A valid identifier starts with a character of the set `IdentStartChars`
  1723. ## and is followed by any number of characters of the set `IdentChars`.
  1724. runnableExamples:
  1725. doAssert "abc_def08".validIdentifier
  1726. if s.len > 0 and s[0] in IdentStartChars:
  1727. for i in 1..s.len-1:
  1728. if s[i] notin IdentChars: return false
  1729. return true
  1730. {.push warning[Deprecated]: off.}
  1731. proc editDistance*(a, b: string): int {.noSideEffect,
  1732. rtl, extern: "nsuEditDistance",
  1733. deprecated: "use editdistance.editDistanceAscii instead".} =
  1734. ## Returns the edit distance between `a` and `b`.
  1735. ##
  1736. ## This uses the `Levenshtein`:idx: distance algorithm with only a linear
  1737. ## memory overhead.
  1738. var len1 = a.len
  1739. var len2 = b.len
  1740. if len1 > len2:
  1741. # make `b` the longer string
  1742. return editDistance(b, a)
  1743. # strip common prefix:
  1744. var s = 0
  1745. while s < len1 and a[s] == b[s]:
  1746. inc(s)
  1747. dec(len1)
  1748. dec(len2)
  1749. # strip common suffix:
  1750. while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
  1751. dec(len1)
  1752. dec(len2)
  1753. # trivial cases:
  1754. if len1 == 0: return len2
  1755. if len2 == 0: return len1
  1756. # another special case:
  1757. if len1 == 1:
  1758. for j in s..s+len2-1:
  1759. if a[s] == b[j]: return len2 - 1
  1760. return len2
  1761. inc(len1)
  1762. inc(len2)
  1763. var half = len1 shr 1
  1764. # initalize first row:
  1765. #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
  1766. var row: seq[int]
  1767. newSeq(row, len2)
  1768. var e = s + len2 - 1 # end marker
  1769. for i in 1..len2 - half - 1: row[i] = i
  1770. row[0] = len1 - half - 1
  1771. for i in 1 .. len1 - 1:
  1772. var char1 = a[i + s - 1]
  1773. var char2p: int
  1774. var D, x: int
  1775. var p: int
  1776. if i >= len1 - half:
  1777. # skip the upper triangle:
  1778. var offset = i - len1 + half
  1779. char2p = offset
  1780. p = offset
  1781. var c3 = row[p] + ord(char1 != b[s + char2p])
  1782. inc(p)
  1783. inc(char2p)
  1784. x = row[p] + 1
  1785. D = x
  1786. if x > c3: x = c3
  1787. row[p] = x
  1788. inc(p)
  1789. else:
  1790. p = 1
  1791. char2p = 0
  1792. D = i
  1793. x = i
  1794. if i <= half + 1:
  1795. # skip the lower triangle:
  1796. e = len2 + i - half - 2
  1797. # main:
  1798. while p <= e:
  1799. dec(D)
  1800. var c3 = D + ord(char1 != b[char2p + s])
  1801. inc(char2p)
  1802. inc(x)
  1803. if x > c3: x = c3
  1804. D = row[p] + 1
  1805. if x > D: x = D
  1806. row[p] = x
  1807. inc(p)
  1808. # lower triangle sentinel:
  1809. if i <= half:
  1810. dec(D)
  1811. var c3 = D + ord(char1 != b[char2p + s])
  1812. inc(x)
  1813. if x > c3: x = c3
  1814. row[p] = x
  1815. result = row[e]
  1816. {.pop.}
  1817. # floating point formating:
  1818. when not defined(js):
  1819. proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
  1820. importc: "sprintf", varargs, noSideEffect.}
  1821. type
  1822. FloatFormatMode* = enum ## the different modes of floating point formating
  1823. ffDefault, ## use the shorter floating point notation
  1824. ffDecimal, ## use decimal floating point notation
  1825. ffScientific ## use scientific notation (using ``e`` character)
  1826. {.deprecated: [TFloatFormat: FloatFormatMode].}
  1827. proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
  1828. precision: range[-1..32] = 16;
  1829. decimalSep = '.'): string {.
  1830. noSideEffect, rtl, extern: "nsu$1".} =
  1831. ## Converts a floating point value `f` to a string.
  1832. ##
  1833. ## If ``format == ffDecimal`` then precision is the number of digits to
  1834. ## be printed after the decimal point.
  1835. ## If ``format == ffScientific`` then precision is the maximum number
  1836. ## of significant digits to be printed.
  1837. ## `precision`'s default value is the maximum number of meaningful digits
  1838. ## after the decimal point for Nim's ``biggestFloat`` type.
  1839. ##
  1840. ## If ``precision == -1``, it tries to format it nicely.
  1841. when defined(js):
  1842. var precision = precision
  1843. if precision == -1:
  1844. # use the same default precision as c_sprintf
  1845. precision = 6
  1846. var res: cstring
  1847. case format
  1848. of ffDefault:
  1849. {.emit: "`res` = `f`.toString();".}
  1850. of ffDecimal:
  1851. {.emit: "`res` = `f`.toFixed(`precision`);".}
  1852. of ffScientific:
  1853. {.emit: "`res` = `f`.toExponential(`precision`);".}
  1854. result = $res
  1855. if 1.0 / f == -Inf:
  1856. # JavaScript removes the "-" from negative Zero, add it back here
  1857. result = "-" & $res
  1858. for i in 0 ..< result.len:
  1859. # Depending on the locale either dot or comma is produced,
  1860. # but nothing else is possible:
  1861. if result[i] in {'.', ','}: result[i] = decimalsep
  1862. else:
  1863. const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
  1864. var
  1865. frmtstr {.noinit.}: array[0..5, char]
  1866. buf {.noinit.}: array[0..2500, char]
  1867. L: cint
  1868. frmtstr[0] = '%'
  1869. if precision >= 0:
  1870. frmtstr[1] = '#'
  1871. frmtstr[2] = '.'
  1872. frmtstr[3] = '*'
  1873. frmtstr[4] = floatFormatToChar[format]
  1874. frmtstr[5] = '\0'
  1875. when defined(nimNoArrayToCstringConversion):
  1876. L = c_sprintf(addr buf, addr frmtstr, precision, f)
  1877. else:
  1878. L = c_sprintf(buf, frmtstr, precision, f)
  1879. else:
  1880. frmtstr[1] = floatFormatToChar[format]
  1881. frmtstr[2] = '\0'
  1882. when defined(nimNoArrayToCstringConversion):
  1883. L = c_sprintf(addr buf, addr frmtstr, f)
  1884. else:
  1885. L = c_sprintf(buf, frmtstr, f)
  1886. result = newString(L)
  1887. for i in 0 ..< L:
  1888. # Depending on the locale either dot or comma is produced,
  1889. # but nothing else is possible:
  1890. if buf[i] in {'.', ','}: result[i] = decimalsep
  1891. else: result[i] = buf[i]
  1892. when defined(windows):
  1893. # VS pre 2015 violates the C standard: "The exponent always contains at
  1894. # least two digits, and only as many more digits as necessary to
  1895. # represent the exponent." [C11 §7.21.6.1]
  1896. # The following post-processing fixes this behavior.
  1897. if result.len > 4 and result[^4] == '+' and result[^3] == '0':
  1898. result[^3] = result[^2]
  1899. result[^2] = result[^1]
  1900. result.setLen(result.len - 1)
  1901. proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
  1902. precision: range[-1..32] = 16; decimalSep = '.'): string {.
  1903. noSideEffect, rtl, extern: "nsu$1".} =
  1904. ## Converts a floating point value `f` to a string.
  1905. ##
  1906. ## If ``format == ffDecimal`` then precision is the number of digits to
  1907. ## be printed after the decimal point.
  1908. ## If ``format == ffScientific`` then precision is the maximum number
  1909. ## of significant digits to be printed.
  1910. ## `precision`'s default value is the maximum number of meaningful digits
  1911. ## after the decimal point for Nim's ``float`` type.
  1912. ##
  1913. ## If ``precision == -1``, it tries to format it nicely.
  1914. runnableExamples:
  1915. let x = 123.456
  1916. doAssert x.formatFloat() == "123.4560000000000"
  1917. doAssert x.formatFloat(ffDecimal, 4) == "123.4560"
  1918. doAssert x.formatFloat(ffScientific, 2) == "1.23e+02"
  1919. result = formatBiggestFloat(f, format, precision, decimalSep)
  1920. proc trimZeros*(x: var string) {.noSideEffect.} =
  1921. ## Trim trailing zeros from a formatted floating point
  1922. ## value (`x`). Modifies the passed value.
  1923. var spl: seq[string]
  1924. if x.contains('.') or x.contains(','):
  1925. if x.contains('e'):
  1926. spl = x.split('e')
  1927. x = spl[0]
  1928. while x[x.high] == '0':
  1929. x.setLen(x.len-1)
  1930. if x[x.high] in [',', '.']:
  1931. x.setLen(x.len-1)
  1932. if spl.len > 0:
  1933. x &= "e" & spl[1]
  1934. type
  1935. BinaryPrefixMode* = enum ## the different names for binary prefixes
  1936. bpIEC, # use the IEC/ISO standard prefixes such as kibi
  1937. bpColloquial # use the colloquial kilo, mega etc
  1938. proc formatSize*(bytes: int64,
  1939. decimalSep = '.',
  1940. prefix = bpIEC,
  1941. includeSpace = false): string {.noSideEffect.} =
  1942. ## Rounds and formats `bytes`.
  1943. ##
  1944. ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
  1945. ## formatted as 1KiB. Set prefix to `bpColloquial` to use the colloquial
  1946. ## names from the SI standard (e.g. k for 1000 being reused as 1024).
  1947. ##
  1948. ## `includeSpace` can be set to true to include the (SI preferred) space
  1949. ## between the number and the unit (e.g. 1 KiB).
  1950. runnableExamples:
  1951. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
  1952. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  1953. doAssert formatSize(4096, includeSpace=true) == "4 KiB"
  1954. doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  1955. doAssert formatSize(4096) == "4KiB"
  1956. doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  1957. const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
  1958. const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
  1959. var
  1960. xb: int64 = bytes
  1961. fbytes: float
  1962. last_xb: int64 = bytes
  1963. matchedIndex: int
  1964. prefixes: array[9, string]
  1965. if prefix == bpColloquial:
  1966. prefixes = collPrefixes
  1967. else:
  1968. prefixes = iecPrefixes
  1969. # Iterate through prefixes seeing if value will be greater than
  1970. # 0 in each case
  1971. for index in 1..<prefixes.len:
  1972. last_xb = xb
  1973. xb = bytes div (1'i64 shl (index*10))
  1974. matchedIndex = index
  1975. if xb == 0:
  1976. xb = last_xb
  1977. matchedIndex = index - 1
  1978. break
  1979. # xb has the integer number for the latest value; index should be correct
  1980. fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
  1981. result = formatFloat(fbytes, format=ffDecimal, precision=3, decimalSep=decimalSep)
  1982. result.trimZeros()
  1983. if includeSpace:
  1984. result &= " "
  1985. result &= prefixes[matchedIndex]
  1986. result &= "B"
  1987. proc formatEng*(f: BiggestFloat,
  1988. precision: range[0..32] = 10,
  1989. trim: bool = true,
  1990. siPrefix: bool = false,
  1991. unit: string = "",
  1992. decimalSep = '.',
  1993. useUnitSpace = false): string {.noSideEffect.} =
  1994. ## Converts a floating point value `f` to a string using engineering notation.
  1995. ##
  1996. ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an
  1997. ## exponent. Numbers outside of this range will be formatted as a
  1998. ## significand in the range -1000.0<f<1000.0 and an exponent that will always
  1999. ## be an integer multiple of 3, corresponding with the SI prefix scale k, M,
  2000. ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p
  2001. ## etc for numbers with an absolute value less than 1.
  2002. ##
  2003. ## The default configuration (`trim=true` and `precision=10`) shows the
  2004. ## **shortest** form that precisely (up to a maximum of 10 decimal places)
  2005. ## displays the value. For example, 4.100000 will be displayed as 4.1 (which
  2006. ## is mathematically identical) whereas 4.1000003 will be displayed as
  2007. ## 4.1000003.
  2008. ##
  2009. ## If `trim` is set to true, trailing zeros will be removed; if false, the
  2010. ## number of digits specified by `precision` will always be shown.
  2011. ##
  2012. ## `precision` can be used to set the number of digits to be shown after the
  2013. ## decimal point or (if `trim` is true) the maximum number of digits to be
  2014. ## shown.
  2015. ##
  2016. ## .. code-block:: nim
  2017. ##
  2018. ## formatEng(0, 2, trim=false) == "0.00"
  2019. ## formatEng(0, 2) == "0"
  2020. ## formatEng(0.053, 0) == "53e-3"
  2021. ## formatEng(52731234, 2) == "52.73e6"
  2022. ## formatEng(-52731234, 2) == "-52.73e6"
  2023. ##
  2024. ## If `siPrefix` is set to true, the number will be displayed with the SI
  2025. ## prefix corresponding to the exponent. For example 4100 will be displayed
  2026. ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place
  2027. ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute
  2028. ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed
  2029. ## with an exponent rather than an SI prefix, regardless of whether
  2030. ## `siPrefix` is true.
  2031. ##
  2032. ## If `useUnitSpace` is true, the provided unit will be appended to the string
  2033. ## (with a space as required by the SI standard). This behaviour is slightly
  2034. ## different to appending the unit to the result as the location of the space
  2035. ## is altered depending on whether there is an exponent.
  2036. ##
  2037. ## .. code-block:: nim
  2038. ##
  2039. ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2040. ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
  2041. ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2042. ## formatEng(4100, siPrefix=true) == "4.1 k"
  2043. ## formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit=""
  2044. ## formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2045. ## formatEng(4100) == "4.1e3"
  2046. ## formatEng(4100, unit="V") == "4.1e3 V"
  2047. ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true
  2048. ##
  2049. ## `decimalSep` is used as the decimal separator.
  2050. var
  2051. absolute: BiggestFloat
  2052. significand: BiggestFloat
  2053. fexponent: BiggestFloat
  2054. exponent: int
  2055. splitResult: seq[string]
  2056. suffix: string = ""
  2057. proc getPrefix(exp: int): char =
  2058. ## Get the SI prefix for a given exponent
  2059. ##
  2060. ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
  2061. const siPrefixes = ['a','f','p','n','u','m',' ','k','M','G','T','P','E']
  2062. var index: int = (exp div 3) + 6
  2063. result = ' '
  2064. if index in low(siPrefixes)..high(siPrefixes):
  2065. result = siPrefixes[index]
  2066. # Most of the work is done with the sign ignored, so get the absolute value
  2067. absolute = abs(f)
  2068. significand = f
  2069. if absolute == 0.0:
  2070. # Simple case: just format it and force the exponent to 0
  2071. exponent = 0
  2072. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  2073. else:
  2074. # Find the best exponent that's a multiple of 3
  2075. fexponent = floor(log10(absolute))
  2076. fexponent = 3.0 * floor(fexponent / 3.0)
  2077. # Adjust the significand for the new exponent
  2078. significand /= pow(10.0, fexponent)
  2079. # Adjust the significand and check whether it has affected
  2080. # the exponent
  2081. absolute = abs(significand)
  2082. if absolute >= 1000.0:
  2083. significand *= 0.001
  2084. fexponent += 3
  2085. # Components of the result:
  2086. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  2087. exponent = fexponent.int()
  2088. splitResult = result.split('.')
  2089. result = splitResult[0]
  2090. # result should have at most one decimal character
  2091. if splitResult.len() > 1:
  2092. # If trim is set, we get rid of trailing zeros. Don't use trimZeros here as
  2093. # we can be a bit more efficient through knowledge that there will never be
  2094. # an exponent in this part.
  2095. if trim:
  2096. while splitResult[1].endsWith("0"):
  2097. # Trim last character
  2098. splitResult[1].setLen(splitResult[1].len-1)
  2099. if splitResult[1].len() > 0:
  2100. result &= decimalSep & splitResult[1]
  2101. else:
  2102. result &= decimalSep & splitResult[1]
  2103. # Combine the results accordingly
  2104. if siPrefix and exponent != 0:
  2105. var p = getPrefix(exponent)
  2106. if p != ' ':
  2107. suffix = " " & p
  2108. exponent = 0 # Exponent replaced by SI prefix
  2109. if suffix == "" and useUnitSpace:
  2110. suffix = " "
  2111. suffix &= unit
  2112. if exponent != 0:
  2113. result &= "e" & $exponent
  2114. result &= suffix
  2115. proc findNormalized(x: string, inArray: openArray[string]): int =
  2116. var i = 0
  2117. while i < high(inArray):
  2118. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  2119. inc(i, 2) # incrementing by 1 would probably lead to a
  2120. # security hole...
  2121. return -1
  2122. proc invalidFormatString() {.noinline.} =
  2123. raise newException(ValueError, "invalid format string")
  2124. proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
  2125. noSideEffect, rtl, extern: "nsuAddf".} =
  2126. ## The same as ``add(s, formatstr % a)``, but more efficient.
  2127. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  2128. var i = 0
  2129. var num = 0
  2130. while i < len(formatstr):
  2131. if formatstr[i] == '$' and i+1 < len(formatstr):
  2132. case formatstr[i+1]
  2133. of '#':
  2134. if num > a.high: invalidFormatString()
  2135. add s, a[num]
  2136. inc i, 2
  2137. inc num
  2138. of '$':
  2139. add s, '$'
  2140. inc(i, 2)
  2141. of '1'..'9', '-':
  2142. var j = 0
  2143. inc(i) # skip $
  2144. var negative = formatstr[i] == '-'
  2145. if negative: inc i
  2146. while i < formatstr.len and formatstr[i] in Digits:
  2147. j = j * 10 + ord(formatstr[i]) - ord('0')
  2148. inc(i)
  2149. let idx = if not negative: j-1 else: a.len-j
  2150. if idx < 0 or idx > a.high: invalidFormatString()
  2151. add s, a[idx]
  2152. of '{':
  2153. var j = i+2
  2154. var k = 0
  2155. var negative = formatstr[j] == '-'
  2156. if negative: inc j
  2157. var isNumber = 0
  2158. while j < formatstr.len and formatstr[j] notin {'\0', '}'}:
  2159. if formatstr[j] in Digits:
  2160. k = k * 10 + ord(formatstr[j]) - ord('0')
  2161. if isNumber == 0: isNumber = 1
  2162. else:
  2163. isNumber = -1
  2164. inc(j)
  2165. if isNumber == 1:
  2166. let idx = if not negative: k-1 else: a.len-k
  2167. if idx < 0 or idx > a.high: invalidFormatString()
  2168. add s, a[idx]
  2169. else:
  2170. var x = findNormalized(substr(formatstr, i+2, j-1), a)
  2171. if x >= 0 and x < high(a): add s, a[x+1]
  2172. else: invalidFormatString()
  2173. i = j+1
  2174. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  2175. var j = i+1
  2176. while j < formatstr.len and formatstr[j] in PatternChars: inc(j)
  2177. var x = findNormalized(substr(formatstr, i+1, j-1), a)
  2178. if x >= 0 and x < high(a): add s, a[x+1]
  2179. else: invalidFormatString()
  2180. i = j
  2181. else:
  2182. invalidFormatString()
  2183. else:
  2184. add s, formatstr[i]
  2185. inc(i)
  2186. proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect,
  2187. rtl, extern: "nsuFormatOpenArray".} =
  2188. ## Interpolates a format string with the values from `a`.
  2189. ##
  2190. ## The `substitution`:idx: operator performs string substitutions in
  2191. ## `formatstr` and returns a modified `formatstr`. This is often called
  2192. ## `string interpolation`:idx:.
  2193. ##
  2194. ## This is best explained by an example:
  2195. ##
  2196. ## .. code-block:: nim
  2197. ## "$1 eats $2." % ["The cat", "fish"]
  2198. ##
  2199. ## Results in:
  2200. ##
  2201. ## .. code-block:: nim
  2202. ## "The cat eats fish."
  2203. ##
  2204. ## The substitution variables (the thing after the ``$``) are enumerated
  2205. ## from 1 to ``a.len``.
  2206. ## To produce a verbatim ``$``, use ``$$``.
  2207. ## The notation ``$#`` can be used to refer to the next substitution
  2208. ## variable:
  2209. ##
  2210. ## .. code-block:: nim
  2211. ## "$# eats $#." % ["The cat", "fish"]
  2212. ##
  2213. ## Substitution variables can also be words (that is
  2214. ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
  2215. ## indices are keys and with odd indices are the corresponding values.
  2216. ## An example:
  2217. ##
  2218. ## .. code-block:: nim
  2219. ## "$animal eats $food." % ["animal", "The cat", "food", "fish"]
  2220. ##
  2221. ## Results in:
  2222. ##
  2223. ## .. code-block:: nim
  2224. ## "The cat eats fish."
  2225. ##
  2226. ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
  2227. ## raised if an ill-formed format string has been passed to the `%` operator.
  2228. result = newStringOfCap(formatstr.len + a.len shl 4)
  2229. addf(result, formatstr, a)
  2230. proc `%` *(formatstr, a: string): string {.noSideEffect,
  2231. rtl, extern: "nsuFormatSingleElem".} =
  2232. ## This is the same as ``formatstr % [a]``.
  2233. result = newStringOfCap(formatstr.len + a.len)
  2234. addf(result, formatstr, [a])
  2235. proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect,
  2236. rtl, extern: "nsuFormatVarargs".} =
  2237. ## This is the same as ``formatstr % a`` except that it supports
  2238. ## auto stringification.
  2239. result = newStringOfCap(formatstr.len + a.len)
  2240. addf(result, formatstr, a)
  2241. {.pop.}
  2242. proc removeSuffix*(s: var string, chars: set[char] = Newlines) {.
  2243. rtl, extern: "nsuRemoveSuffixCharSet".} =
  2244. ## Removes all characters from `chars` from the end of the string `s`
  2245. ## (in-place).
  2246. runnableExamples:
  2247. var userInput = "Hello World!*~\r\n"
  2248. userInput.removeSuffix
  2249. doAssert userInput == "Hello World!*~"
  2250. userInput.removeSuffix({'~', '*'})
  2251. doAssert userInput == "Hello World!"
  2252. var otherInput = "Hello!?!"
  2253. otherInput.removeSuffix({'!', '?'})
  2254. doAssert otherInput == "Hello"
  2255. if s.len == 0: return
  2256. var last = s.high
  2257. while last > -1 and s[last] in chars: last -= 1
  2258. s.setLen(last + 1)
  2259. proc removeSuffix*(s: var string, c: char) {.
  2260. rtl, extern: "nsuRemoveSuffixChar".} =
  2261. ## Removes all occurrences of a single character (in-place) from the end
  2262. ## of a string.
  2263. ##
  2264. runnableExamples:
  2265. var table = "users"
  2266. table.removeSuffix('s')
  2267. doAssert table == "user"
  2268. var dots = "Trailing dots......."
  2269. dots.removeSuffix('.')
  2270. doAssert dots == "Trailing dots"
  2271. removeSuffix(s, chars = {c})
  2272. proc removeSuffix*(s: var string, suffix: string) {.
  2273. rtl, extern: "nsuRemoveSuffixString".} =
  2274. ## Remove the first matching suffix (in-place) from a string.
  2275. runnableExamples:
  2276. var answers = "yeses"
  2277. answers.removeSuffix("es")
  2278. doAssert answers == "yes"
  2279. var newLen = s.len
  2280. if s.endsWith(suffix):
  2281. newLen -= len(suffix)
  2282. s.setLen(newLen)
  2283. proc removePrefix*(s: var string, chars: set[char] = Newlines) {.
  2284. rtl, extern: "nsuRemovePrefixCharSet".} =
  2285. ## Removes all characters from `chars` from the start of the string `s`
  2286. ## (in-place).
  2287. ##
  2288. runnableExamples:
  2289. var userInput = "\r\n*~Hello World!"
  2290. userInput.removePrefix
  2291. doAssert userInput == "*~Hello World!"
  2292. userInput.removePrefix({'~', '*'})
  2293. doAssert userInput == "Hello World!"
  2294. var otherInput = "?!?Hello!?!"
  2295. otherInput.removePrefix({'!', '?'})
  2296. doAssert otherInput == "Hello!?!"
  2297. var start = 0
  2298. while start < s.len and s[start] in chars: start += 1
  2299. if start > 0: s.delete(0, start - 1)
  2300. proc removePrefix*(s: var string, c: char) {.
  2301. rtl, extern: "nsuRemovePrefixChar".} =
  2302. ## Removes all occurrences of a single character (in-place) from the start
  2303. ## of a string.
  2304. ##
  2305. runnableExamples:
  2306. var ident = "pControl"
  2307. ident.removePrefix('p')
  2308. doAssert ident == "Control"
  2309. removePrefix(s, chars = {c})
  2310. proc removePrefix*(s: var string, prefix: string) {.
  2311. rtl, extern: "nsuRemovePrefixString".} =
  2312. ## Remove the first matching prefix (in-place) from a string.
  2313. ##
  2314. runnableExamples:
  2315. var answers = "yesyes"
  2316. answers.removePrefix("yes")
  2317. doAssert answers == "yes"
  2318. if s.startsWith(prefix):
  2319. s.delete(0, prefix.len - 1)
  2320. proc stripLineEnd*(s: var string) =
  2321. ## Returns ``s`` stripped from one of these suffixes:
  2322. ## ``\r, \n, \r\n, \f, \v`` (at most once instance).
  2323. ## For example, can be useful in conjunction with ``osproc.execCmdEx``.
  2324. runnableExamples:
  2325. var s = "foo\n\n"
  2326. s.stripLineEnd
  2327. doAssert s == "foo\n"
  2328. s = "foo\r\n"
  2329. s.stripLineEnd
  2330. doAssert s == "foo"
  2331. if s.len > 0:
  2332. case s[^1]
  2333. of '\n':
  2334. if s.len > 1 and s[^2] == '\r':
  2335. s.setLen s.len-2
  2336. else:
  2337. s.setLen s.len-1
  2338. of '\r', '\v', '\f':
  2339. s.setLen s.len-1
  2340. else:
  2341. discard
  2342. when isMainModule:
  2343. proc nonStaticTests =
  2344. doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000"
  2345. when not defined(js):
  2346. doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." # <=== bug 8242
  2347. doAssert formatBiggestFloat(1234.567, ffDecimal, 1) == "1234.6"
  2348. doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001"
  2349. doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in
  2350. ["1,0e-11", "1,0e-011"]
  2351. # bug #6589
  2352. when not defined(js):
  2353. doAssert formatFloat(123.456, ffScientific, precision = -1) == "1.234560e+02"
  2354. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  2355. doAssert "${1}12 ${-1}$2" % ["a", "b"] == "a12 bb"
  2356. block: # formatSize tests
  2357. when not defined(js):
  2358. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" # <=== bug #8231
  2359. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  2360. doAssert formatSize(4096) == "4KiB"
  2361. doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  2362. doAssert formatSize(4096, includeSpace=true) == "4 KiB"
  2363. doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  2364. block: # formatEng tests
  2365. doAssert formatEng(0, 2, trim=false) == "0.00"
  2366. doAssert formatEng(0, 2) == "0"
  2367. doAssert formatEng(53, 2, trim=false) == "53.00"
  2368. doAssert formatEng(0.053, 2, trim=false) == "53.00e-3"
  2369. doAssert formatEng(0.053, 4, trim=false) == "53.0000e-3"
  2370. doAssert formatEng(0.053, 4, trim=true) == "53e-3"
  2371. doAssert formatEng(0.053, 0) == "53e-3"
  2372. doAssert formatEng(52731234) == "52.731234e6"
  2373. doAssert formatEng(-52731234) == "-52.731234e6"
  2374. doAssert formatEng(52731234, 1) == "52.7e6"
  2375. doAssert formatEng(-52731234, 1) == "-52.7e6"
  2376. doAssert formatEng(52731234, 1, decimalSep=',') == "52,7e6"
  2377. doAssert formatEng(-52731234, 1, decimalSep=',') == "-52,7e6"
  2378. doAssert formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2379. doAssert formatEng(4.1, siPrefix=true, unit="V", useUnitSpace=true) == "4.1 V"
  2380. doAssert formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2381. doAssert formatEng(4100, siPrefix=true) == "4.1 k"
  2382. doAssert formatEng(4.1, siPrefix=true, unit="", useUnitSpace=true) == "4.1 " # Includes space
  2383. doAssert formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2384. doAssert formatEng(4100) == "4.1e3"
  2385. doAssert formatEng(4100, unit="V", useUnitSpace=true) == "4.1e3 V"
  2386. doAssert formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 "
  2387. # Don't use SI prefix as number is too big
  2388. doAssert formatEng(3.1e22, siPrefix=true, unit="a", useUnitSpace=true) == "31e21 a"
  2389. # Don't use SI prefix as number is too small
  2390. doAssert formatEng(3.1e-25, siPrefix=true, unit="A", useUnitSpace=true) == "310e-27 A"
  2391. proc staticTests =
  2392. doAssert align("abc", 4) == " abc"
  2393. doAssert align("a", 0) == "a"
  2394. doAssert align("1232", 6) == " 1232"
  2395. doAssert align("1232", 6, '#') == "##1232"
  2396. doAssert alignLeft("abc", 4) == "abc "
  2397. doAssert alignLeft("a", 0) == "a"
  2398. doAssert alignLeft("1232", 6) == "1232 "
  2399. doAssert alignLeft("1232", 6, '#') == "1232##"
  2400. let
  2401. inp = """ this is a long text -- muchlongerthan10chars and here
  2402. it goes"""
  2403. outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
  2404. doAssert wordWrap(inp, 10, false) == outp
  2405. let
  2406. longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
  2407. longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
  2408. doAssert wordWrap(longInp, 8, true) == longOutp
  2409. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  2410. "The cat eats fish."
  2411. doAssert "-ld a-ldz -ld".replaceWord("-ld") == " a-ldz "
  2412. doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc"
  2413. doAssert "-lda-ldz -ld abc".replaceWord("") == "-lda-ldz -ld abc"
  2414. doAssert "oo".replace("", "abc") == "abcoabcoabc"
  2415. type MyEnum = enum enA, enB, enC, enuD, enE
  2416. doAssert parseEnum[MyEnum]("enu_D") == enuD
  2417. doAssert parseEnum("invalid enum value", enC) == enC
  2418. doAssert center("foo", 13) == " foo "
  2419. doAssert center("foo", 0) == "foo"
  2420. doAssert center("foo", 3, fillChar = 'a') == "foo"
  2421. doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
  2422. doAssert count("foofoofoo", "foofoo") == 1
  2423. doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
  2424. doAssert count("foofoofoo", 'f') == 3
  2425. doAssert count("foofoofoobar", {'f','b'}) == 4
  2426. doAssert strip(" foofoofoo ") == "foofoofoo"
  2427. doAssert strip("sfoofoofoos", chars = {'s'}) == "foofoofoo"
  2428. doAssert strip("barfoofoofoobar", chars = {'b', 'a', 'r'}) == "foofoofoo"
  2429. doAssert strip("stripme but don't strip this stripme",
  2430. chars = {'s', 't', 'r', 'i', 'p', 'm', 'e'}) ==
  2431. " but don't strip this "
  2432. doAssert strip("sfoofoofoos", leading = false, chars = {'s'}) == "sfoofoofoo"
  2433. doAssert strip("sfoofoofoos", trailing = false, chars = {'s'}) == "foofoofoos"
  2434. doAssert " foo\n bar".indent(4, "Q") == "QQQQ foo\nQQQQ bar"
  2435. doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
  2436. doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.", "PEOPLE!")) == "HELLO PEOPLE!"
  2437. doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
  2438. doAssert isAlphaAscii('r')
  2439. doAssert isAlphaAscii('A')
  2440. doAssert(not isAlphaAscii('$'))
  2441. doAssert isAlphaNumeric('3')
  2442. doAssert isAlphaNumeric('R')
  2443. doAssert(not isAlphaNumeric('!'))
  2444. doAssert isDigit('3')
  2445. doAssert(not isDigit('a'))
  2446. doAssert(not isDigit('%'))
  2447. doAssert isSpaceAscii('\t')
  2448. doAssert isSpaceAscii('\l')
  2449. doAssert(not isSpaceAscii('A'))
  2450. doAssert(isNilOrWhitespace(""))
  2451. doAssert(isNilOrWhitespace(" "))
  2452. doAssert(isNilOrWhitespace("\t\l \v\r\f"))
  2453. doAssert(not isNilOrWhitespace("ABc \td"))
  2454. doAssert isLowerAscii('a')
  2455. doAssert isLowerAscii('z')
  2456. doAssert(not isLowerAscii('A'))
  2457. doAssert(not isLowerAscii('5'))
  2458. doAssert(not isLowerAscii('&'))
  2459. doAssert(not isLowerAscii(' '))
  2460. doAssert isUpperAscii('A')
  2461. doAssert(not isUpperAscii('b'))
  2462. doAssert(not isUpperAscii('5'))
  2463. doAssert(not isUpperAscii('%'))
  2464. doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
  2465. doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
  2466. doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
  2467. doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
  2468. doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
  2469. doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
  2470. doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
  2471. doAssert(unescape(r"\x013", "", "") == "\x013")
  2472. doAssert join(["foo", "bar", "baz"]) == "foobarbaz"
  2473. doAssert join(@["foo", "bar", "baz"], ", ") == "foo, bar, baz"
  2474. doAssert join([1, 2, 3]) == "123"
  2475. doAssert join(@[1, 2, 3], ", ") == "1, 2, 3"
  2476. doAssert """~~!!foo
  2477. ~~!!bar
  2478. ~~!!baz""".unindent(2, "~~!!") == "foo\nbar\nbaz"
  2479. doAssert """~~!!foo
  2480. ~~!!bar
  2481. ~~!!baz""".unindent(2, "~~!!aa") == "~~!!foo\n~~!!bar\n~~!!baz"
  2482. doAssert """~~foo
  2483. ~~ bar
  2484. ~~ baz""".unindent(4, "~") == "foo\n bar\n baz"
  2485. doAssert """foo
  2486. bar
  2487. baz
  2488. """.unindent(4) == "foo\nbar\nbaz\n"
  2489. doAssert """foo
  2490. bar
  2491. baz
  2492. """.unindent(2) == "foo\n bar\n baz\n"
  2493. doAssert """foo
  2494. bar
  2495. baz
  2496. """.unindent(100) == "foo\nbar\nbaz\n"
  2497. doAssert """foo
  2498. foo
  2499. bar
  2500. """.unindent() == "foo\nfoo\nbar\n"
  2501. let s = " this is an example "
  2502. let s2 = ":this;is;an:example;;"
  2503. doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
  2504. doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""]
  2505. doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "]
  2506. doAssert s.split(' ', maxsplit=1) == @["", "this is an example "]
  2507. doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "]
  2508. doAssert s.splitWhitespace() == @["this", "is", "an", "example"]
  2509. doAssert s.splitWhitespace(maxsplit=1) == @["this", "is an example "]
  2510. doAssert s.splitWhitespace(maxsplit=2) == @["this", "is", "an example "]
  2511. doAssert s.splitWhitespace(maxsplit=3) == @["this", "is", "an", "example "]
  2512. doAssert s.splitWhitespace(maxsplit=4) == @["this", "is", "an", "example"]
  2513. block: # startsWith / endsWith char tests
  2514. var s = "abcdef"
  2515. doAssert s.startsWith('a')
  2516. doAssert s.startsWith('b') == false
  2517. doAssert s.endsWith('f')
  2518. doAssert s.endsWith('a') == false
  2519. doAssert s.endsWith('\0') == false
  2520. #echo("strutils tests passed")
  2521. nonStaticTests()
  2522. staticTests()
  2523. static: staticTests()