strutils.nim 80 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains various string utility routines.
  10. ## See the module `re <re.html>`_ for regular expression support.
  11. ## See the module `pegs <pegs.html>`_ for PEG support.
  12. ## This module is available for the `JavaScript target
  13. ## <backends.html#the-javascript-target>`_.
  14. import parseutils
  15. from math import pow, round, floor, log10
  16. from algorithm import reverse
  17. {.deadCodeElim: on.}
  18. {.push debugger:off .} # the user does not want to trace a part
  19. # of the standard library!
  20. include "system/inclrtl"
  21. {.pop.}
  22. # Support old split with set[char]
  23. when defined(nimOldSplit):
  24. {.pragma: deprecatedSplit, deprecated.}
  25. else:
  26. {.pragma: deprecatedSplit.}
  27. type
  28. CharSet* {.deprecated.} = set[char] # for compatibility with Nim
  29. {.deprecated: [TCharSet: CharSet].}
  30. const
  31. Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
  32. ## All the characters that count as whitespace.
  33. Letters* = {'A'..'Z', 'a'..'z'}
  34. ## the set of letters
  35. Digits* = {'0'..'9'}
  36. ## the set of digits
  37. HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'}
  38. ## the set of hexadecimal digits
  39. IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
  40. ## the set of characters an identifier can consist of
  41. IdentStartChars* = {'a'..'z', 'A'..'Z', '_'}
  42. ## the set of characters an identifier can start with
  43. NewLines* = {'\13', '\10'}
  44. ## the set of characters a newline terminator can start with
  45. AllChars* = {'\x00'..'\xFF'}
  46. ## A set with all the possible characters.
  47. ##
  48. ## Not very useful by its own, you can use it to create *inverted* sets to
  49. ## make the `find() proc <#find,string,set[char],int>`_ find **invalid**
  50. ## characters in strings. Example:
  51. ##
  52. ## .. code-block:: nim
  53. ## let invalid = AllChars - Digits
  54. ## doAssert "01234".find(invalid) == -1
  55. ## doAssert "01A34".find(invalid) == 2
  56. proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar,
  57. rtl, extern: "nsuIsAlphaAsciiChar".}=
  58. ## Checks whether or not `c` is alphabetical.
  59. ##
  60. ## This checks a-z, A-Z ASCII characters only.
  61. return c in Letters
  62. proc isAlphaNumeric*(c: char): bool {.noSideEffect, procvar,
  63. rtl, extern: "nsuIsAlphaNumericChar".}=
  64. ## Checks whether or not `c` is alphanumeric.
  65. ##
  66. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  67. return c in Letters or c in Digits
  68. proc isDigit*(c: char): bool {.noSideEffect, procvar,
  69. rtl, extern: "nsuIsDigitChar".}=
  70. ## Checks whether or not `c` is a number.
  71. ##
  72. ## This checks 0-9 ASCII characters only.
  73. return c in Digits
  74. proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar,
  75. rtl, extern: "nsuIsSpaceAsciiChar".}=
  76. ## Checks whether or not `c` is a whitespace character.
  77. return c in Whitespace
  78. proc isLowerAscii*(c: char): bool {.noSideEffect, procvar,
  79. rtl, extern: "nsuIsLowerAsciiChar".}=
  80. ## Checks whether or not `c` is a lower case character.
  81. ##
  82. ## This checks ASCII characters only.
  83. return c in {'a'..'z'}
  84. proc isUpperAscii*(c: char): bool {.noSideEffect, procvar,
  85. rtl, extern: "nsuIsUpperAsciiChar".}=
  86. ## Checks whether or not `c` is an upper case character.
  87. ##
  88. ## This checks ASCII characters only.
  89. return c in {'A'..'Z'}
  90. proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
  91. rtl, extern: "nsuIsAlphaAsciiStr".}=
  92. ## Checks whether or not `s` is alphabetical.
  93. ##
  94. ## This checks a-z, A-Z ASCII characters only.
  95. ## Returns true if all characters in `s` are
  96. ## alphabetic and there is at least one character
  97. ## in `s`.
  98. if s.len() == 0:
  99. return false
  100. result = true
  101. for c in s:
  102. result = c.isAlphaAscii() and result
  103. proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
  104. rtl, extern: "nsuIsAlphaNumericStr".}=
  105. ## Checks whether or not `s` is alphanumeric.
  106. ##
  107. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  108. ## Returns true if all characters in `s` are
  109. ## alpanumeric and there is at least one character
  110. ## in `s`.
  111. if s.len() == 0:
  112. return false
  113. result = true
  114. for c in s:
  115. result = c.isAlphaNumeric() and result
  116. proc isDigit*(s: string): bool {.noSideEffect, procvar,
  117. rtl, extern: "nsuIsDigitStr".}=
  118. ## Checks whether or not `s` is a numeric value.
  119. ##
  120. ## This checks 0-9 ASCII characters only.
  121. ## Returns true if all characters in `s` are
  122. ## numeric and there is at least one character
  123. ## in `s`.
  124. if s.len() == 0:
  125. return false
  126. result = true
  127. for c in s:
  128. result = c.isDigit() and result
  129. proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
  130. rtl, extern: "nsuIsSpaceAsciiStr".}=
  131. ## Checks whether or not `s` is completely whitespace.
  132. ##
  133. ## Returns true if all characters in `s` are whitespace
  134. ## characters and there is at least one character in `s`.
  135. if s.len() == 0:
  136. return false
  137. result = true
  138. for c in s:
  139. if not c.isSpaceAscii():
  140. return false
  141. proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
  142. rtl, extern: "nsuIsLowerAsciiStr".}=
  143. ## Checks whether or not `s` contains all lower case characters.
  144. ##
  145. ## This checks ASCII characters only.
  146. ## Returns true if all characters in `s` are lower case
  147. ## and there is at least one character in `s`.
  148. if s.len() == 0:
  149. return false
  150. for c in s:
  151. if not c.isLowerAscii():
  152. return false
  153. true
  154. proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
  155. rtl, extern: "nsuIsUpperAsciiStr".}=
  156. ## Checks whether or not `s` contains all upper case characters.
  157. ##
  158. ## This checks ASCII characters only.
  159. ## Returns true if all characters in `s` are upper case
  160. ## and there is at least one character in `s`.
  161. if s.len() == 0:
  162. return false
  163. for c in s:
  164. if not c.isUpperAscii():
  165. return false
  166. true
  167. proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
  168. rtl, extern: "nsuToLowerAsciiChar".} =
  169. ## Converts `c` into lower case.
  170. ##
  171. ## This works only for the letters ``A-Z``. See `unicode.toLower
  172. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  173. ## character.
  174. if c in {'A'..'Z'}:
  175. result = chr(ord(c) + (ord('a') - ord('A')))
  176. else:
  177. result = c
  178. proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
  179. rtl, extern: "nsuToLowerAsciiStr".} =
  180. ## Converts `s` into lower case.
  181. ##
  182. ## This works only for the letters ``A-Z``. See `unicode.toLower
  183. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  184. ## character.
  185. result = newString(len(s))
  186. for i in 0..len(s) - 1:
  187. result[i] = toLowerAscii(s[i])
  188. proc toUpperAscii*(c: char): char {.noSideEffect, procvar,
  189. rtl, extern: "nsuToUpperAsciiChar".} =
  190. ## Converts `c` into upper case.
  191. ##
  192. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  193. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  194. ## character.
  195. if c in {'a'..'z'}:
  196. result = chr(ord(c) - (ord('a') - ord('A')))
  197. else:
  198. result = c
  199. proc toUpperAscii*(s: string): string {.noSideEffect, procvar,
  200. rtl, extern: "nsuToUpperAsciiStr".} =
  201. ## Converts `s` into upper case.
  202. ##
  203. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  204. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  205. ## character.
  206. result = newString(len(s))
  207. for i in 0..len(s) - 1:
  208. result[i] = toUpperAscii(s[i])
  209. proc capitalizeAscii*(s: string): string {.noSideEffect, procvar,
  210. rtl, extern: "nsuCapitalizeAscii".} =
  211. ## Converts the first character of `s` into upper case.
  212. ##
  213. ## This works only for the letters ``A-Z``.
  214. result = toUpperAscii(s[0]) & substr(s, 1)
  215. proc isSpace*(c: char): bool {.noSideEffect, procvar,
  216. rtl, deprecated, extern: "nsuIsSpaceChar".}=
  217. ## Checks whether or not `c` is a whitespace character.
  218. ##
  219. ## **Deprecated since version 0.15.0**: use ``isSpaceAscii`` instead.
  220. isSpaceAscii(c)
  221. proc isLower*(c: char): bool {.noSideEffect, procvar,
  222. rtl, deprecated, extern: "nsuIsLowerChar".}=
  223. ## Checks whether or not `c` is a lower case character.
  224. ##
  225. ## This checks ASCII characters only.
  226. ##
  227. ## **Deprecated since version 0.15.0**: use ``isLowerAscii`` instead.
  228. isLowerAscii(c)
  229. proc isUpper*(c: char): bool {.noSideEffect, procvar,
  230. rtl, deprecated, extern: "nsuIsUpperChar".}=
  231. ## Checks whether or not `c` is an upper case character.
  232. ##
  233. ## This checks ASCII characters only.
  234. ##
  235. ## **Deprecated since version 0.15.0**: use ``isUpperAscii`` instead.
  236. isUpperAscii(c)
  237. proc isAlpha*(c: char): bool {.noSideEffect, procvar,
  238. rtl, deprecated, extern: "nsuIsAlphaChar".}=
  239. ## Checks whether or not `c` is alphabetical.
  240. ##
  241. ## This checks a-z, A-Z ASCII characters only.
  242. ##
  243. ## **Deprecated since version 0.15.0**: use ``isAlphaAscii`` instead.
  244. isAlphaAscii(c)
  245. proc isAlpha*(s: string): bool {.noSideEffect, procvar,
  246. rtl, deprecated, extern: "nsuIsAlphaStr".}=
  247. ## Checks whether or not `s` is alphabetical.
  248. ##
  249. ## This checks a-z, A-Z ASCII characters only.
  250. ## Returns true if all characters in `s` are
  251. ## alphabetic and there is at least one character
  252. ## in `s`.
  253. ##
  254. ## **Deprecated since version 0.15.0**: use ``isAlphaAscii`` instead.
  255. isAlphaAscii(s)
  256. proc isSpace*(s: string): bool {.noSideEffect, procvar,
  257. rtl, deprecated, extern: "nsuIsSpaceStr".}=
  258. ## Checks whether or not `s` is completely whitespace.
  259. ##
  260. ## Returns true if all characters in `s` are whitespace
  261. ## characters and there is at least one character in `s`.
  262. ##
  263. ## **Deprecated since version 0.15.0**: use ``isSpaceAscii`` instead.
  264. isSpaceAscii(s)
  265. proc isLower*(s: string): bool {.noSideEffect, procvar,
  266. rtl, deprecated, extern: "nsuIsLowerStr".}=
  267. ## Checks whether or not `s` contains all lower case characters.
  268. ##
  269. ## This checks ASCII characters only.
  270. ## Returns true if all characters in `s` are lower case
  271. ## and there is at least one character in `s`.
  272. ##
  273. ## **Deprecated since version 0.15.0**: use ``isLowerAscii`` instead.
  274. isLowerAscii(s)
  275. proc isUpper*(s: string): bool {.noSideEffect, procvar,
  276. rtl, deprecated, extern: "nsuIsUpperStr".}=
  277. ## Checks whether or not `s` contains all upper case characters.
  278. ##
  279. ## This checks ASCII characters only.
  280. ## Returns true if all characters in `s` are upper case
  281. ## and there is at least one character in `s`.
  282. ##
  283. ## **Deprecated since version 0.15.0**: use ``isUpperAscii`` instead.
  284. isUpperAscii(s)
  285. proc toLower*(c: char): char {.noSideEffect, procvar,
  286. rtl, deprecated, extern: "nsuToLowerChar".} =
  287. ## Converts `c` into lower case.
  288. ##
  289. ## This works only for the letters ``A-Z``. See `unicode.toLower
  290. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  291. ## character.
  292. ##
  293. ## **Deprecated since version 0.15.0**: use ``toLowerAscii`` instead.
  294. toLowerAscii(c)
  295. proc toLower*(s: string): string {.noSideEffect, procvar,
  296. rtl, deprecated, extern: "nsuToLowerStr".} =
  297. ## Converts `s` into lower case.
  298. ##
  299. ## This works only for the letters ``A-Z``. See `unicode.toLower
  300. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  301. ## character.
  302. ##
  303. ## **Deprecated since version 0.15.0**: use ``toLowerAscii`` instead.
  304. toLowerAscii(s)
  305. proc toUpper*(c: char): char {.noSideEffect, procvar,
  306. rtl, deprecated, extern: "nsuToUpperChar".} =
  307. ## Converts `c` into upper case.
  308. ##
  309. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  310. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  311. ## character.
  312. ##
  313. ## **Deprecated since version 0.15.0**: use ``toUpperAscii`` instead.
  314. toUpperAscii(c)
  315. proc toUpper*(s: string): string {.noSideEffect, procvar,
  316. rtl, deprecated, extern: "nsuToUpperStr".} =
  317. ## Converts `s` into upper case.
  318. ##
  319. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  320. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  321. ## character.
  322. ##
  323. ## **Deprecated since version 0.15.0**: use ``toUpperAscii`` instead.
  324. toUpperAscii(s)
  325. proc capitalize*(s: string): string {.noSideEffect, procvar,
  326. rtl, deprecated, extern: "nsuCapitalize".} =
  327. ## Converts the first character of `s` into upper case.
  328. ##
  329. ## This works only for the letters ``A-Z``.
  330. ##
  331. ## **Deprecated since version 0.15.0**: use ``capitalizeAscii`` instead.
  332. capitalizeAscii(s)
  333. proc normalize*(s: string): string {.noSideEffect, procvar,
  334. rtl, extern: "nsuNormalize".} =
  335. ## Normalizes the string `s`.
  336. ##
  337. ## That means to convert it to lower case and remove any '_'. This is needed
  338. ## for Nim identifiers for example.
  339. result = newString(s.len)
  340. var j = 0
  341. for i in 0..len(s) - 1:
  342. if s[i] in {'A'..'Z'}:
  343. result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
  344. inc j
  345. elif s[i] != '_':
  346. result[j] = s[i]
  347. inc j
  348. if j != s.len: setLen(result, j)
  349. proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
  350. rtl, extern: "nsuCmpIgnoreCase", procvar.} =
  351. ## Compares two strings in a case insensitive manner. Returns:
  352. ##
  353. ## | 0 iff a == b
  354. ## | < 0 iff a < b
  355. ## | > 0 iff a > b
  356. var i = 0
  357. var m = min(a.len, b.len)
  358. while i < m:
  359. result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
  360. if result != 0: return
  361. inc(i)
  362. result = a.len - b.len
  363. {.push checks: off, line_trace: off .} # this is a hot-spot in the compiler!
  364. # thus we compile without checks here
  365. proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
  366. rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
  367. ## Compares two strings normalized (i.e. case and
  368. ## underscores do not matter). Returns:
  369. ##
  370. ## | 0 iff a == b
  371. ## | < 0 iff a < b
  372. ## | > 0 iff a > b
  373. var i = 0
  374. var j = 0
  375. while true:
  376. while a[i] == '_': inc(i)
  377. while b[j] == '_': inc(j) # BUGFIX: typo
  378. var aa = toLowerAscii(a[i])
  379. var bb = toLowerAscii(b[j])
  380. result = ord(aa) - ord(bb)
  381. if result != 0 or aa == '\0': break
  382. inc(i)
  383. inc(j)
  384. proc strip*(s: string, leading = true, trailing = true,
  385. chars: set[char] = Whitespace): string
  386. {.noSideEffect, rtl, extern: "nsuStrip".} =
  387. ## Strips `chars` from `s` and returns the resulting string.
  388. ##
  389. ## If `leading` is true, leading `chars` are stripped.
  390. ## If `trailing` is true, trailing `chars` are stripped.
  391. var
  392. first = 0
  393. last = len(s)-1
  394. if leading:
  395. while s[first] in chars: inc(first)
  396. if trailing:
  397. while last >= 0 and s[last] in chars: dec(last)
  398. result = substr(s, first, last)
  399. proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
  400. ## Converts a character `c` to its octal representation.
  401. ##
  402. ## The resulting string may not have a leading zero. Its length is always
  403. ## exactly 3.
  404. result = newString(3)
  405. var val = ord(c)
  406. for i in countdown(2, 0):
  407. result[i] = chr(val mod 8 + ord('0'))
  408. val = val div 8
  409. proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrEmpty".} =
  410. ## Checks if `s` is nil or empty.
  411. result = len(s) == 0
  412. proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} =
  413. ## Checks if `s` is nil or consists entirely of whitespace characters.
  414. if len(s) == 0:
  415. return true
  416. result = true
  417. for c in s:
  418. if not c.isSpaceAscii():
  419. return false
  420. proc substrEq(s: string, pos: int, substr: string): bool =
  421. var i = 0
  422. var length = substr.len
  423. while i < length and s[pos+i] == substr[i]:
  424. inc i
  425. return i == length
  426. # --------- Private templates for different split separators -----------
  427. template stringHasSep(s: string, index: int, seps: set[char]): bool =
  428. s[index] in seps
  429. template stringHasSep(s: string, index: int, sep: char): bool =
  430. s[index] == sep
  431. template stringHasSep(s: string, index: int, sep: string): bool =
  432. s.substrEq(index, sep)
  433. template splitCommon(s, sep, maxsplit, sepLen) =
  434. ## Common code for split procedures
  435. var last = 0
  436. var splits = maxsplit
  437. if len(s) > 0:
  438. while last <= len(s):
  439. var first = last
  440. while last < len(s) and not stringHasSep(s, last, sep):
  441. inc(last)
  442. if splits == 0: last = len(s)
  443. yield substr(s, first, last-1)
  444. if splits == 0: break
  445. dec(splits)
  446. inc(last, sepLen)
  447. template oldSplit(s, seps, maxsplit) =
  448. var last = 0
  449. var splits = maxsplit
  450. assert(not ('\0' in seps))
  451. while last < len(s):
  452. while s[last] in seps: inc(last)
  453. var first = last
  454. while last < len(s) and s[last] notin seps: inc(last)
  455. if first <= last-1:
  456. if splits == 0: last = len(s)
  457. yield substr(s, first, last-1)
  458. if splits == 0: break
  459. dec(splits)
  460. iterator split*(s: string, seps: set[char] = Whitespace,
  461. maxsplit: int = -1): string =
  462. ## Splits the string `s` into substrings using a group of separators.
  463. ##
  464. ## Substrings are separated by a substring containing only `seps`.
  465. ##
  466. ## .. code-block:: nim
  467. ## for word in split("this\lis an\texample"):
  468. ## writeLine(stdout, word)
  469. ##
  470. ## ...generates this output:
  471. ##
  472. ## .. code-block::
  473. ## "this"
  474. ## "is"
  475. ## "an"
  476. ## "example"
  477. ##
  478. ## And the following code:
  479. ##
  480. ## .. code-block:: nim
  481. ## for word in split("this:is;an$example", {';', ':', '$'}):
  482. ## writeLine(stdout, word)
  483. ##
  484. ## ...produces the same output as the first example. The code:
  485. ##
  486. ## .. code-block:: nim
  487. ## let date = "2012-11-20T22:08:08.398990"
  488. ## let separators = {' ', '-', ':', 'T'}
  489. ## for number in split(date, separators):
  490. ## writeLine(stdout, number)
  491. ##
  492. ## ...results in:
  493. ##
  494. ## .. code-block::
  495. ## "2012"
  496. ## "11"
  497. ## "20"
  498. ## "22"
  499. ## "08"
  500. ## "08.398990"
  501. ##
  502. when defined(nimOldSplit):
  503. oldSplit(s, seps, maxsplit)
  504. else:
  505. splitCommon(s, seps, maxsplit, 1)
  506. iterator splitWhitespace*(s: string): string =
  507. ## Splits at whitespace.
  508. oldSplit(s, Whitespace, -1)
  509. proc splitWhitespace*(s: string): seq[string] {.noSideEffect,
  510. rtl, extern: "nsuSplitWhitespace".} =
  511. ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
  512. ## iterator, but is a proc that returns a sequence of substrings.
  513. accumulateResult(splitWhitespace(s))
  514. iterator split*(s: string, sep: char, maxsplit: int = -1): string =
  515. ## Splits the string `s` into substrings using a single separator.
  516. ##
  517. ## Substrings are separated by the character `sep`.
  518. ## The code:
  519. ##
  520. ## .. code-block:: nim
  521. ## for word in split(";;this;is;an;;example;;;", ';'):
  522. ## writeLine(stdout, word)
  523. ##
  524. ## Results in:
  525. ##
  526. ## .. code-block::
  527. ## ""
  528. ## ""
  529. ## "this"
  530. ## "is"
  531. ## "an"
  532. ## ""
  533. ## "example"
  534. ## ""
  535. ## ""
  536. ## ""
  537. ##
  538. splitCommon(s, sep, maxsplit, 1)
  539. iterator split*(s: string, sep: string, maxsplit: int = -1): string =
  540. ## Splits the string `s` into substrings using a string separator.
  541. ##
  542. ## Substrings are separated by the string `sep`.
  543. ## The code:
  544. ##
  545. ## .. code-block:: nim
  546. ## for word in split("thisDATAisDATAcorrupted", "DATA"):
  547. ## writeLine(stdout, word)
  548. ##
  549. ## Results in:
  550. ##
  551. ## .. code-block::
  552. ## "this"
  553. ## "is"
  554. ## "corrupted"
  555. ##
  556. splitCommon(s, sep, maxsplit, sep.len)
  557. template rsplitCommon(s, sep, maxsplit, sepLen) =
  558. ## Common code for rsplit functions
  559. var
  560. last = s.len - 1
  561. first = last
  562. splits = maxsplit
  563. startPos = 0
  564. if len(s) > 0:
  565. # go to -1 in order to get separators at the beginning
  566. while first >= -1:
  567. while first >= 0 and not stringHasSep(s, first, sep):
  568. dec(first)
  569. if splits == 0:
  570. # No more splits means set first to the beginning
  571. first = -1
  572. if first == -1:
  573. startPos = 0
  574. else:
  575. startPos = first + sepLen
  576. yield substr(s, startPos, last)
  577. if splits == 0:
  578. break
  579. dec(splits)
  580. dec(first)
  581. last = first
  582. iterator rsplit*(s: string, seps: set[char] = Whitespace,
  583. maxsplit: int = -1): string =
  584. ## Splits the string `s` into substrings from the right using a
  585. ## string separator. Works exactly the same as `split iterator
  586. ## <#split.i,string,char>`_ except in reverse order.
  587. ##
  588. ## .. code-block:: nim
  589. ## for piece in "foo bar".rsplit(WhiteSpace):
  590. ## echo piece
  591. ##
  592. ## Results in:
  593. ##
  594. ## .. code-block:: nim
  595. ## "bar"
  596. ## "foo"
  597. ##
  598. ## Substrings are separated from the right by the set of chars `seps`
  599. rsplitCommon(s, seps, maxsplit, 1)
  600. iterator rsplit*(s: string, sep: char,
  601. maxsplit: int = -1): string =
  602. ## Splits the string `s` into substrings from the right using a
  603. ## string separator. Works exactly the same as `split iterator
  604. ## <#split.i,string,char>`_ except in reverse order.
  605. ##
  606. ## .. code-block:: nim
  607. ## for piece in "foo:bar".rsplit(':'):
  608. ## echo piece
  609. ##
  610. ## Results in:
  611. ##
  612. ## .. code-block:: nim
  613. ## "bar"
  614. ## "foo"
  615. ##
  616. ## Substrings are separated from the right by the char `sep`
  617. rsplitCommon(s, sep, maxsplit, 1)
  618. iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
  619. keepSeparators: bool = false): string =
  620. ## Splits the string `s` into substrings from the right using a
  621. ## string separator. Works exactly the same as `split iterator
  622. ## <#split.i,string,string>`_ except in reverse order.
  623. ##
  624. ## .. code-block:: nim
  625. ## for piece in "foothebar".rsplit("the"):
  626. ## echo piece
  627. ##
  628. ## Results in:
  629. ##
  630. ## .. code-block:: nim
  631. ## "bar"
  632. ## "foo"
  633. ##
  634. ## Substrings are separated from the right by the string `sep`
  635. rsplitCommon(s, sep, maxsplit, sep.len)
  636. iterator splitLines*(s: string): string =
  637. ## Splits the string `s` into its containing lines.
  638. ##
  639. ## Every `character literal <manual.html#character-literals>`_ newline
  640. ## combination (CR, LF, CR-LF) is supported. The result strings contain no
  641. ## trailing ``\n``.
  642. ##
  643. ## Example:
  644. ##
  645. ## .. code-block:: nim
  646. ## for line in splitLines("\nthis\nis\nan\n\nexample\n"):
  647. ## writeLine(stdout, line)
  648. ##
  649. ## Results in:
  650. ##
  651. ## .. code-block:: nim
  652. ## ""
  653. ## "this"
  654. ## "is"
  655. ## "an"
  656. ## ""
  657. ## "example"
  658. ## ""
  659. var first = 0
  660. var last = 0
  661. while true:
  662. while s[last] notin {'\0', '\c', '\l'}: inc(last)
  663. yield substr(s, first, last-1)
  664. # skip newlines:
  665. if s[last] == '\l': inc(last)
  666. elif s[last] == '\c':
  667. inc(last)
  668. if s[last] == '\l': inc(last)
  669. else: break # was '\0'
  670. first = last
  671. proc splitLines*(s: string): seq[string] {.noSideEffect,
  672. rtl, extern: "nsuSplitLines".} =
  673. ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a
  674. ## proc that returns a sequence of substrings.
  675. accumulateResult(splitLines(s))
  676. proc countLines*(s: string): int {.noSideEffect,
  677. rtl, extern: "nsuCountLines".} =
  678. ## Returns the number of lines in the string `s`.
  679. ##
  680. ## This is the same as ``len(splitLines(s))``, but much more efficient
  681. ## because it doesn't modify the string creating temporal objects. Every
  682. ## `character literal <manual.html#character-literals>`_ newline combination
  683. ## (CR, LF, CR-LF) is supported.
  684. ##
  685. ## In this context, a line is any string seperated by a newline combination.
  686. ## A line can be an empty string.
  687. result = 1
  688. var i = 0
  689. while i < s.len:
  690. case s[i]
  691. of '\c':
  692. if s[i+1] == '\l': inc i
  693. inc result
  694. of '\l': inc result
  695. else: discard
  696. inc i
  697. proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.
  698. noSideEffect, rtl, extern: "nsuSplitCharSet".} =
  699. ## The same as the `split iterator <#split.i,string,set[char]>`_, but is a
  700. ## proc that returns a sequence of substrings.
  701. accumulateResult(split(s, seps, maxsplit))
  702. proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect,
  703. rtl, extern: "nsuSplitChar".} =
  704. ## The same as the `split iterator <#split.i,string,char>`_, but is a proc
  705. ## that returns a sequence of substrings.
  706. accumulateResult(split(s, sep, maxsplit))
  707. proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  708. rtl, extern: "nsuSplitString".} =
  709. ## Splits the string `s` into substrings using a string separator.
  710. ##
  711. ## Substrings are separated by the string `sep`. This is a wrapper around the
  712. ## `split iterator <#split.i,string,string>`_.
  713. doAssert(sep.len > 0)
  714. accumulateResult(split(s, sep, maxsplit))
  715. proc rsplit*(s: string, seps: set[char] = Whitespace,
  716. maxsplit: int = -1): seq[string]
  717. {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
  718. ## The same as the `rsplit iterator <#rsplit.i,string,set[char]>`_, but is a
  719. ## proc that returns a sequence of substrings.
  720. ##
  721. ## A possible common use case for `rsplit` is path manipulation,
  722. ## particularly on systems that don't use a common delimiter.
  723. ##
  724. ## For example, if a system had `#` as a delimiter, you could
  725. ## do the following to get the tail of the path:
  726. ##
  727. ## .. code-block:: nim
  728. ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
  729. ##
  730. ## Results in `tailSplit` containing:
  731. ##
  732. ## .. code-block:: nim
  733. ## @["Root#Object#Method", "Index"]
  734. ##
  735. accumulateResult(rsplit(s, seps, maxsplit))
  736. result.reverse()
  737. proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
  738. {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
  739. ## The same as the `split iterator <#rsplit.i,string,char>`_, but is a proc
  740. ## that returns a sequence of substrings.
  741. ##
  742. ## A possible common use case for `rsplit` is path manipulation,
  743. ## particularly on systems that don't use a common delimiter.
  744. ##
  745. ## For example, if a system had `#` as a delimiter, you could
  746. ## do the following to get the tail of the path:
  747. ##
  748. ## .. code-block:: nim
  749. ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
  750. ##
  751. ## Results in `tailSplit` containing:
  752. ##
  753. ## .. code-block:: nim
  754. ## @["Root#Object#Method", "Index"]
  755. ##
  756. accumulateResult(rsplit(s, sep, maxsplit))
  757. result.reverse()
  758. proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
  759. {.noSideEffect, rtl, extern: "nsuRSplitString".} =
  760. ## The same as the `split iterator <#rsplit.i,string,string>`_, but is a proc
  761. ## that returns a sequence of substrings.
  762. ##
  763. ## A possible common use case for `rsplit` is path manipulation,
  764. ## particularly on systems that don't use a common delimiter.
  765. ##
  766. ## For example, if a system had `#` as a delimiter, you could
  767. ## do the following to get the tail of the path:
  768. ##
  769. ## .. code-block:: nim
  770. ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
  771. ##
  772. ## Results in `tailSplit` containing:
  773. ##
  774. ## .. code-block:: nim
  775. ## @["Root#Object#Method", "Index"]
  776. ##
  777. accumulateResult(rsplit(s, sep, maxsplit))
  778. result.reverse()
  779. proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
  780. rtl, extern: "nsuToHex".} =
  781. ## Converts `x` to its hexadecimal representation.
  782. ##
  783. ## The resulting string will be exactly `len` characters long. No prefix like
  784. ## ``0x`` is generated. `x` is treated as an unsigned value.
  785. const
  786. HexChars = "0123456789ABCDEF"
  787. var
  788. n = x
  789. result = newString(len)
  790. for j in countdown(len-1, 0):
  791. result[j] = HexChars[int(n and 0xF)]
  792. n = n shr 4
  793. # handle negative overflow
  794. if n == 0 and x < 0: n = -1
  795. proc toHex*[T](x: T): string =
  796. ## Shortcut for ``toHex(x, T.sizeOf * 2)``
  797. toHex(BiggestInt(x), T.sizeOf * 2)
  798. proc intToStr*(x: int, minchars: Positive = 1): string {.noSideEffect,
  799. rtl, extern: "nsuIntToStr".} =
  800. ## Converts `x` to its decimal representation.
  801. ##
  802. ## The resulting string will be minimally `minchars` characters long. This is
  803. ## achieved by adding leading zeros.
  804. result = $abs(x)
  805. for i in 1 .. minchars - len(result):
  806. result = '0' & result
  807. if x < 0:
  808. result = '-' & result
  809. proc parseInt*(s: string): int {.noSideEffect, procvar,
  810. rtl, extern: "nsuParseInt".} =
  811. ## Parses a decimal integer value contained in `s`.
  812. ##
  813. ## If `s` is not a valid integer, `ValueError` is raised.
  814. var L = parseutils.parseInt(s, result, 0)
  815. if L != s.len or L == 0:
  816. raise newException(ValueError, "invalid integer: " & s)
  817. proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
  818. rtl, extern: "nsuParseBiggestInt".} =
  819. ## Parses a decimal integer value contained in `s`.
  820. ##
  821. ## If `s` is not a valid integer, `ValueError` is raised.
  822. var L = parseutils.parseBiggestInt(s, result, 0)
  823. if L != s.len or L == 0:
  824. raise newException(ValueError, "invalid integer: " & s)
  825. proc parseUInt*(s: string): uint {.noSideEffect, procvar,
  826. rtl, extern: "nsuParseUInt".} =
  827. ## Parses a decimal unsigned integer value contained in `s`.
  828. ##
  829. ## If `s` is not a valid integer, `ValueError` is raised.
  830. var L = parseutils.parseUInt(s, result, 0)
  831. if L != s.len or L == 0:
  832. raise newException(ValueError, "invalid unsigned integer: " & s)
  833. proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
  834. rtl, extern: "nsuParseBiggestUInt".} =
  835. ## Parses a decimal unsigned integer value contained in `s`.
  836. ##
  837. ## If `s` is not a valid integer, `ValueError` is raised.
  838. var L = parseutils.parseBiggestUInt(s, result, 0)
  839. if L != s.len or L == 0:
  840. raise newException(ValueError, "invalid unsigned integer: " & s)
  841. proc parseFloat*(s: string): float {.noSideEffect, procvar,
  842. rtl, extern: "nsuParseFloat".} =
  843. ## Parses a decimal floating point value contained in `s`. If `s` is not
  844. ## a valid floating point number, `ValueError` is raised. ``NAN``,
  845. ## ``INF``, ``-INF`` are also supported (case insensitive comparison).
  846. var L = parseutils.parseFloat(s, result, 0)
  847. if L != s.len or L == 0:
  848. raise newException(ValueError, "invalid float: " & s)
  849. proc parseHexInt*(s: string): int {.noSideEffect, procvar,
  850. rtl, extern: "nsuParseHexInt".} =
  851. ## Parses a hexadecimal integer value contained in `s`.
  852. ##
  853. ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
  854. ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores
  855. ## within `s` are ignored.
  856. var i = 0
  857. if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
  858. elif s[i] == '#': inc(i)
  859. while true:
  860. case s[i]
  861. of '_': inc(i)
  862. of '0'..'9':
  863. result = result shl 4 or (ord(s[i]) - ord('0'))
  864. inc(i)
  865. of 'a'..'f':
  866. result = result shl 4 or (ord(s[i]) - ord('a') + 10)
  867. inc(i)
  868. of 'A'..'F':
  869. result = result shl 4 or (ord(s[i]) - ord('A') + 10)
  870. inc(i)
  871. of '\0': break
  872. else: raise newException(ValueError, "invalid integer: " & s)
  873. proc parseBool*(s: string): bool =
  874. ## Parses a value into a `bool`.
  875. ##
  876. ## If ``s`` is one of the following values: ``y, yes, true, 1, on``, then
  877. ## returns `true`. If ``s`` is one of the following values: ``n, no, false,
  878. ## 0, off``, then returns `false`. If ``s`` is something else a
  879. ## ``ValueError`` exception is raised.
  880. case normalize(s)
  881. of "y", "yes", "true", "1", "on": result = true
  882. of "n", "no", "false", "0", "off": result = false
  883. else: raise newException(ValueError, "cannot interpret as a bool: " & s)
  884. proc parseEnum*[T: enum](s: string): T =
  885. ## Parses an enum ``T``.
  886. ##
  887. ## Raises ``ValueError`` for an invalid value in `s`. The comparison is
  888. ## done in a style insensitive way.
  889. for e in low(T)..high(T):
  890. if cmpIgnoreStyle(s, $e) == 0:
  891. return e
  892. raise newException(ValueError, "invalid enum value: " & s)
  893. proc parseEnum*[T: enum](s: string, default: T): T =
  894. ## Parses an enum ``T``.
  895. ##
  896. ## Uses `default` for an invalid value in `s`. The comparison is done in a
  897. ## style insensitive way.
  898. for e in low(T)..high(T):
  899. if cmpIgnoreStyle(s, $e) == 0:
  900. return e
  901. result = default
  902. proc repeat*(c: char, count: Natural): string {.noSideEffect,
  903. rtl, extern: "nsuRepeatChar".} =
  904. ## Returns a string of length `count` consisting only of
  905. ## the character `c`. You can use this proc to left align strings. Example:
  906. ##
  907. ## .. code-block:: nim
  908. ## proc tabexpand(indent: int, text: string, tabsize: int = 4) =
  909. ## echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize),
  910. ## text
  911. ##
  912. ## tabexpand(4, "At four")
  913. ## tabexpand(5, "At five")
  914. ## tabexpand(6, "At six")
  915. result = newString(count)
  916. for i in 0..count-1: result[i] = c
  917. proc repeat*(s: string, n: Natural): string {.noSideEffect,
  918. rtl, extern: "nsuRepeatStr".} =
  919. ## Returns String `s` concatenated `n` times. Example:
  920. ##
  921. ## .. code-block:: nim
  922. ## echo "+++ STOP ".repeat(4), "+++"
  923. result = newStringOfCap(n * s.len)
  924. for i in 1..n: result.add(s)
  925. template spaces*(n: Natural): string = repeat(' ', n)
  926. ## Returns a String with `n` space characters. You can use this proc
  927. ## to left align strings. Example:
  928. ##
  929. ## .. code-block:: nim
  930. ## let
  931. ## width = 15
  932. ## text1 = "Hello user!"
  933. ## text2 = "This is a very long string"
  934. ## echo text1 & spaces(max(0, width - text1.len)) & "|"
  935. ## echo text2 & spaces(max(0, width - text2.len)) & "|"
  936. proc repeatChar*(count: Natural, c: char = ' '): string {.deprecated.} =
  937. ## deprecated: use repeat() or spaces()
  938. repeat(c, count)
  939. proc repeatStr*(count: Natural, s: string): string {.deprecated.} =
  940. ## deprecated: use repeat(string, count) or string.repeat(count)
  941. repeat(s, count)
  942. proc align*(s: string, count: Natural, padding = ' '): string {.
  943. noSideEffect, rtl, extern: "nsuAlignString".} =
  944. ## Aligns a string `s` with `padding`, so that it is of length `count`.
  945. ##
  946. ## `padding` characters (by default spaces) are added before `s` resulting in
  947. ## right alignment. If ``s.len >= count``, no spaces are added and `s` is
  948. ## returned unchanged. If you need to left align a string use the `repeatChar
  949. ## proc <#repeatChar>`_. Example:
  950. ##
  951. ## .. code-block:: nim
  952. ## assert align("abc", 4) == " abc"
  953. ## assert align("a", 0) == "a"
  954. ## assert align("1232", 6) == " 1232"
  955. ## assert align("1232", 6, '#') == "##1232"
  956. if s.len < count:
  957. result = newString(count)
  958. let spaces = count - s.len
  959. for i in 0..spaces-1: result[i] = padding
  960. for i in spaces..count-1: result[i] = s[i-spaces]
  961. else:
  962. result = s
  963. iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
  964. token: string, isSep: bool] =
  965. ## Tokenizes the string `s` into substrings.
  966. ##
  967. ## Substrings are separated by a substring containing only `seps`.
  968. ## Examples:
  969. ##
  970. ## .. code-block:: nim
  971. ## for word in tokenize(" this is an example "):
  972. ## writeLine(stdout, word)
  973. ##
  974. ## Results in:
  975. ##
  976. ## .. code-block:: nim
  977. ## (" ", true)
  978. ## ("this", false)
  979. ## (" ", true)
  980. ## ("is", false)
  981. ## (" ", true)
  982. ## ("an", false)
  983. ## (" ", true)
  984. ## ("example", false)
  985. ## (" ", true)
  986. var i = 0
  987. while true:
  988. var j = i
  989. var isSep = s[j] in seps
  990. while j < s.len and (s[j] in seps) == isSep: inc(j)
  991. if j > i:
  992. yield (substr(s, i, j-1), isSep)
  993. else:
  994. break
  995. i = j
  996. proc wordWrap*(s: string, maxLineWidth = 80,
  997. splitLongWords = true,
  998. seps: set[char] = Whitespace,
  999. newLine = "\n"): string {.
  1000. noSideEffect, rtl, extern: "nsuWordWrap".} =
  1001. ## Word wraps `s`.
  1002. result = newStringOfCap(s.len + s.len shr 6)
  1003. var spaceLeft = maxLineWidth
  1004. var lastSep = ""
  1005. for word, isSep in tokenize(s, seps):
  1006. if isSep:
  1007. lastSep = word
  1008. spaceLeft = spaceLeft - len(word)
  1009. continue
  1010. if len(word) > spaceLeft:
  1011. if splitLongWords and len(word) > maxLineWidth:
  1012. result.add(substr(word, 0, spaceLeft-1))
  1013. var w = spaceLeft+1
  1014. var wordLeft = len(word) - spaceLeft
  1015. while wordLeft > 0:
  1016. result.add(newLine)
  1017. var L = min(maxLineWidth, wordLeft)
  1018. spaceLeft = maxLineWidth - L
  1019. result.add(substr(word, w, w+L-1))
  1020. inc(w, L)
  1021. dec(wordLeft, L)
  1022. else:
  1023. spaceLeft = maxLineWidth - len(word)
  1024. result.add(newLine)
  1025. result.add(word)
  1026. else:
  1027. spaceLeft = spaceLeft - len(word)
  1028. result.add(lastSep & word)
  1029. lastSep.setLen(0)
  1030. proc indent*(s: string, count: Natural, padding: string = " "): string
  1031. {.noSideEffect, rtl, extern: "nsuIndent".} =
  1032. ## Indents each line in ``s`` by ``count`` amount of ``padding``.
  1033. ##
  1034. ## **Note:** This does not preserve the new line characters used in ``s``.
  1035. result = ""
  1036. var i = 0
  1037. for line in s.splitLines():
  1038. if i != 0:
  1039. result.add("\n")
  1040. for j in 1..count:
  1041. result.add(padding)
  1042. result.add(line)
  1043. i.inc
  1044. proc unindent*(s: string, count: Natural, padding: string = " "): string
  1045. {.noSideEffect, rtl, extern: "nsuUnindent".} =
  1046. ## Unindents each line in ``s`` by ``count`` amount of ``padding``.
  1047. ##
  1048. ## **Note:** This does not preserve the new line characters used in ``s``.
  1049. result = ""
  1050. var i = 0
  1051. for line in s.splitLines():
  1052. if i != 0:
  1053. result.add("\n")
  1054. var indentCount = 0
  1055. for j in 0..<count.int:
  1056. indentCount.inc
  1057. if line[j .. j + <padding.len] != padding:
  1058. indentCount = j
  1059. break
  1060. result.add(line[indentCount*padding.len .. ^1])
  1061. i.inc
  1062. proc unindent*(s: string): string
  1063. {.noSideEffect, rtl, extern: "nsuUnindentAll".} =
  1064. ## Removes all indentation composed of whitespace from each line in ``s``.
  1065. ##
  1066. ## For example:
  1067. ##
  1068. ## .. code-block:: nim
  1069. ## const x = """
  1070. ## Hello
  1071. ## There
  1072. ## """.unindent()
  1073. ##
  1074. ## doAssert x == "Hello\nThere\n"
  1075. unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish.
  1076. proc startsWith*(s, prefix: string): bool {.noSideEffect,
  1077. rtl, extern: "nsuStartsWith".} =
  1078. ## Returns true iff ``s`` starts with ``prefix``.
  1079. ##
  1080. ## If ``prefix == ""`` true is returned.
  1081. var i = 0
  1082. while true:
  1083. if prefix[i] == '\0': return true
  1084. if s[i] != prefix[i]: return false
  1085. inc(i)
  1086. proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} =
  1087. ## Returns true iff ``s`` starts with ``prefix``.
  1088. result = s[0] == prefix
  1089. proc endsWith*(s, suffix: string): bool {.noSideEffect,
  1090. rtl, extern: "nsuEndsWith".} =
  1091. ## Returns true iff ``s`` ends with ``suffix``.
  1092. ##
  1093. ## If ``suffix == ""`` true is returned.
  1094. var i = 0
  1095. var j = len(s) - len(suffix)
  1096. while i+j <% s.len:
  1097. if s[i+j] != suffix[i]: return false
  1098. inc(i)
  1099. if suffix[i] == '\0': return true
  1100. proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} =
  1101. ## Returns true iff ``s`` ends with ``suffix``.
  1102. result = s[s.high] == suffix
  1103. proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
  1104. rtl, extern: "nsuContinuesWith".} =
  1105. ## Returns true iff ``s`` continues with ``substr`` at position ``start``.
  1106. ##
  1107. ## If ``substr == ""`` true is returned.
  1108. var i = 0
  1109. while true:
  1110. if substr[i] == '\0': return true
  1111. if s[i+start] != substr[i]: return false
  1112. inc(i)
  1113. proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
  1114. {.noSideEffect, inline.} =
  1115. ## Adds a separator to `dest` only if its length is bigger than `startLen`.
  1116. ##
  1117. ## A shorthand for:
  1118. ##
  1119. ## .. code-block:: nim
  1120. ## if dest.len > startLen: add(dest, sep)
  1121. ##
  1122. ## This is often useful for generating some code where the items need to
  1123. ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
  1124. ## `startLen`. The following example creates a string describing
  1125. ## an array of integers:
  1126. ##
  1127. ## .. code-block:: nim
  1128. ## var arr = "["
  1129. ## for x in items([2, 3, 5, 7, 11]):
  1130. ## addSep(arr, startLen=len("["))
  1131. ## add(arr, $x)
  1132. ## add(arr, "]")
  1133. if dest.len > startLen: add(dest, sep)
  1134. proc allCharsInSet*(s: string, theSet: set[char]): bool =
  1135. ## Returns true iff each character of `s` is in the set `theSet`.
  1136. for c in items(s):
  1137. if c notin theSet: return false
  1138. return true
  1139. proc abbrev*(s: string, possibilities: openArray[string]): int =
  1140. ## Returns the index of the first item in `possibilities` if not ambiguous.
  1141. ##
  1142. ## Returns -1 if no item has been found and -2 if multiple items match.
  1143. result = -1 # none found
  1144. for i in 0..possibilities.len-1:
  1145. if possibilities[i].startsWith(s):
  1146. if possibilities[i] == s:
  1147. # special case: exact match shouldn't be ambiguous
  1148. return i
  1149. if result >= 0: return -2 # ambiguous
  1150. result = i
  1151. # ---------------------------------------------------------------------------
  1152. proc join*(a: openArray[string], sep: string = ""): string {.
  1153. noSideEffect, rtl, extern: "nsuJoinSep".} =
  1154. ## Concatenates all strings in `a` separating them with `sep`.
  1155. if len(a) > 0:
  1156. var L = sep.len * (a.len-1)
  1157. for i in 0..high(a): inc(L, a[i].len)
  1158. result = newStringOfCap(L)
  1159. add(result, a[0])
  1160. for i in 1..high(a):
  1161. add(result, sep)
  1162. add(result, a[i])
  1163. else:
  1164. result = ""
  1165. proc join*[T: not string](a: openArray[T], sep: string = ""): string {.
  1166. noSideEffect, rtl.} =
  1167. ## Converts all elements in `a` to strings using `$` and concatenates them
  1168. ## with `sep`.
  1169. result = ""
  1170. for i, x in a:
  1171. if i > 0:
  1172. add(result, sep)
  1173. add(result, $x)
  1174. type
  1175. SkipTable = array[char, int]
  1176. {.push profiler: off.}
  1177. proc preprocessSub(sub: string, a: var SkipTable) =
  1178. var m = len(sub)
  1179. for i in 0..0xff: a[chr(i)] = m+1
  1180. for i in 0..m-1: a[sub[i]] = m-i
  1181. {.pop.}
  1182. proc findAux(s, sub: string, start, last: int, a: SkipTable): int =
  1183. # Fast "quick search" algorithm:
  1184. var
  1185. m = len(sub)
  1186. n = last + 1
  1187. # search:
  1188. var j = start
  1189. while j <= n - m:
  1190. block match:
  1191. for k in 0..m-1:
  1192. if sub[k] != s[k+j]: break match
  1193. return j
  1194. inc(j, a[s[j+m]])
  1195. return -1
  1196. when not (defined(js) or defined(nimdoc) or defined(nimscript)):
  1197. proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
  1198. importc: "memchr", header: "<string.h>" .}
  1199. const hasCStringBuiltin = true
  1200. else:
  1201. const hasCStringBuiltin = false
  1202. proc find*(s, sub: string, start: Natural = 0, last: Natural = 0): int {.noSideEffect,
  1203. rtl, extern: "nsuFindStr".} =
  1204. ## Searches for `sub` in `s` inside range `start`..`last`.
  1205. ## If `last` is unspecified, it defaults to `s.high`.
  1206. ##
  1207. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1208. var a {.noinit.}: SkipTable
  1209. let last = if last==0: s.high else: last
  1210. preprocessSub(sub, a)
  1211. result = findAux(s, sub, start, last, a)
  1212. proc find*(s: string, sub: char, start: Natural = 0, last: Natural = 0): int {.noSideEffect,
  1213. rtl, extern: "nsuFindChar".} =
  1214. ## Searches for `sub` in `s` inside range `start`..`last`.
  1215. ## If `last` is unspecified, it defaults to `s.high`.
  1216. ##
  1217. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1218. let last = if last==0: s.high else: last
  1219. when nimvm:
  1220. for i in start..last:
  1221. if sub == s[i]: return i
  1222. else:
  1223. when hasCStringBuiltin:
  1224. let found = c_memchr(s[start].unsafeAddr, sub, last-start+1)
  1225. if not found.isNil:
  1226. return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
  1227. else:
  1228. for i in start..last:
  1229. if sub == s[i]: return i
  1230. return -1
  1231. proc find*(s: string, chars: set[char], start: Natural = 0, last: Natural = 0): int {.noSideEffect,
  1232. rtl, extern: "nsuFindCharSet".} =
  1233. ## Searches for `chars` in `s` inside range `start`..`last`.
  1234. ## If `last` is unspecified, it defaults to `s.high`.
  1235. ##
  1236. ## If `s` contains none of the characters in `chars`, -1 is returned.
  1237. let last = if last==0: s.high else: last
  1238. for i in start..last:
  1239. if s[i] in chars: return i
  1240. return -1
  1241. proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} =
  1242. ## Searches for `sub` in `s` in reverse, starting at `start` and going
  1243. ## backwards to 0.
  1244. ##
  1245. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1246. let realStart = if start == -1: s.len else: start
  1247. for i in countdown(realStart-sub.len, 0):
  1248. for j in 0..sub.len-1:
  1249. result = i
  1250. if sub[j] != s[i+j]:
  1251. result = -1
  1252. break
  1253. if result != -1: return
  1254. return -1
  1255. proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect,
  1256. rtl.} =
  1257. ## Searches for `sub` in `s` in reverse starting at position `start`.
  1258. ##
  1259. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1260. let realStart = if start == -1: s.len-1 else: start
  1261. for i in countdown(realStart, 0):
  1262. if sub == s[i]: return i
  1263. return -1
  1264. proc rfind*(s: string, chars: set[char], start: int = -1): int {.noSideEffect.} =
  1265. ## Searches for `chars` in `s` in reverse starting at position `start`.
  1266. ##
  1267. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1268. let realStart = if start == -1: s.len-1 else: start
  1269. for i in countdown(realStart, 0):
  1270. if s[i] in chars: return i
  1271. return -1
  1272. proc center*(s: string, width: int, fillChar: char = ' '): string {.
  1273. noSideEffect, rtl, extern: "nsuCenterString".} =
  1274. ## Return the contents of `s` centered in a string `width` long using
  1275. ## `fillChar` as padding.
  1276. ##
  1277. ## The original string is returned if `width` is less than or equal
  1278. ## to `s.len`.
  1279. if width <= s.len:
  1280. return s
  1281. result = newString(width)
  1282. # Left padding will be one fillChar
  1283. # smaller if there are an odd number
  1284. # of characters
  1285. let
  1286. charsLeft = (width - s.len)
  1287. leftPadding = charsLeft div 2
  1288. for i in 0 ..< width:
  1289. if i >= leftPadding and i < leftPadding + s.len:
  1290. # we are where the string should be located
  1291. result[i] = s[i-leftPadding]
  1292. else:
  1293. # we are either before or after where
  1294. # the string s should go
  1295. result[i] = fillChar
  1296. proc count*(s: string, sub: string, overlapping: bool = false): int {.
  1297. noSideEffect, rtl, extern: "nsuCountString".} =
  1298. ## Count the occurrences of a substring `sub` in the string `s`.
  1299. ## Overlapping occurrences of `sub` only count when `overlapping`
  1300. ## is set to true.
  1301. var i = 0
  1302. while true:
  1303. i = s.find(sub, i)
  1304. if i < 0:
  1305. break
  1306. if overlapping:
  1307. inc i
  1308. else:
  1309. i += sub.len
  1310. inc result
  1311. proc count*(s: string, sub: char): int {.noSideEffect,
  1312. rtl, extern: "nsuCountChar".} =
  1313. ## Count the occurrences of the character `sub` in the string `s`.
  1314. for c in s:
  1315. if c == sub:
  1316. inc result
  1317. proc count*(s: string, subs: set[char]): int {.noSideEffect,
  1318. rtl, extern: "nsuCountCharSet".} =
  1319. ## Count the occurrences of the group of character `subs` in the string `s`.
  1320. for c in s:
  1321. if c in subs:
  1322. inc result
  1323. proc quoteIfContainsWhite*(s: string): string {.deprecated.} =
  1324. ## Returns ``'"' & s & '"'`` if `s` contains a space and does not
  1325. ## start with a quote, else returns `s`.
  1326. ##
  1327. ## **DEPRECATED** as it was confused for shell quoting function. For this
  1328. ## application use `osproc.quoteShell <osproc.html#quoteShell>`_.
  1329. if find(s, {' ', '\t'}) >= 0 and s[0] != '"':
  1330. result = '"' & s & '"'
  1331. else:
  1332. result = s
  1333. proc contains*(s: string, c: char): bool {.noSideEffect.} =
  1334. ## Same as ``find(s, c) >= 0``.
  1335. return find(s, c) >= 0
  1336. proc contains*(s, sub: string): bool {.noSideEffect.} =
  1337. ## Same as ``find(s, sub) >= 0``.
  1338. return find(s, sub) >= 0
  1339. proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} =
  1340. ## Same as ``find(s, chars) >= 0``.
  1341. return find(s, chars) >= 0
  1342. proc replace*(s, sub: string, by = ""): string {.noSideEffect,
  1343. rtl, extern: "nsuReplaceStr".} =
  1344. ## Replaces `sub` in `s` by the string `by`.
  1345. var a {.noinit.}: SkipTable
  1346. result = ""
  1347. preprocessSub(sub, a)
  1348. let last = s.high
  1349. var i = 0
  1350. while true:
  1351. var j = findAux(s, sub, i, last, a)
  1352. if j < 0: break
  1353. add result, substr(s, i, j - 1)
  1354. add result, by
  1355. i = j + len(sub)
  1356. # copy the rest:
  1357. add result, substr(s, i)
  1358. proc replace*(s: string, sub, by: char): string {.noSideEffect,
  1359. rtl, extern: "nsuReplaceChar".} =
  1360. ## Replaces `sub` in `s` by the character `by`.
  1361. ##
  1362. ## Optimized version of `replace <#replace,string,string>`_ for characters.
  1363. result = newString(s.len)
  1364. var i = 0
  1365. while i < s.len:
  1366. if s[i] == sub: result[i] = by
  1367. else: result[i] = s[i]
  1368. inc(i)
  1369. proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
  1370. rtl, extern: "nsuReplaceWord".} =
  1371. ## Replaces `sub` in `s` by the string `by`.
  1372. ##
  1373. ## Each occurrence of `sub` has to be surrounded by word boundaries
  1374. ## (comparable to ``\\w`` in regular expressions), otherwise it is not
  1375. ## replaced.
  1376. const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
  1377. var a {.noinit.}: SkipTable
  1378. result = ""
  1379. preprocessSub(sub, a)
  1380. var i = 0
  1381. let last = s.high
  1382. while true:
  1383. var j = findAux(s, sub, i, last, a)
  1384. if j < 0: break
  1385. # word boundary?
  1386. if (j == 0 or s[j-1] notin wordChars) and
  1387. (j+sub.len >= s.len or s[j+sub.len] notin wordChars):
  1388. add result, substr(s, i, j - 1)
  1389. add result, by
  1390. i = j + len(sub)
  1391. else:
  1392. add result, substr(s, i, j)
  1393. i = j + 1
  1394. # copy the rest:
  1395. add result, substr(s, i)
  1396. proc multiReplace*(s: string, replacements: varargs[(string, string)]): string {.noSideEffect.} =
  1397. ## Same as replace, but specialized for doing multiple replacements in a single
  1398. ## pass through the input string.
  1399. ##
  1400. ## Calling replace multiple times after each other is inefficient and result in too many allocations
  1401. ## follwed by immediate deallocations as portions of the string gets replaced.
  1402. ## multiReplace performs all replacements in a single pass.
  1403. ##
  1404. ## If the resulting string is not longer than the original input string, only a single
  1405. ## memory allocation is required.
  1406. ##
  1407. ## The order of the replacements does matter. Earlier replacements are preferred over later
  1408. ## replacements in the argument list.
  1409. result = newStringOfCap(s.len)
  1410. var i = 0
  1411. var fastChk: set[char] = {}
  1412. for tup in replacements: fastChk.incl(tup[0][0]) # Include first character of all replacements
  1413. while i < s.len:
  1414. block sIteration:
  1415. # Assume most chars in s are not candidates for any replacement operation
  1416. if s[i] in fastChk:
  1417. for tup in replacements:
  1418. if s.continuesWith(tup[0], i):
  1419. add result, tup[1]
  1420. inc(i, tup[0].len)
  1421. break sIteration
  1422. # No matching replacement found
  1423. # copy current character from s
  1424. add result, s[i]
  1425. inc(i)
  1426. proc delete*(s: var string, first, last: int) {.noSideEffect,
  1427. rtl, extern: "nsuDelete".} =
  1428. ## Deletes in `s` the characters at position `first` .. `last`.
  1429. ##
  1430. ## This modifies `s` itself, it does not return a copy.
  1431. var i = first
  1432. var j = last+1
  1433. var newLen = len(s)-j+i
  1434. while i < newLen:
  1435. s[i] = s[j]
  1436. inc(i)
  1437. inc(j)
  1438. setLen(s, newLen)
  1439. proc parseOctInt*(s: string): int {.noSideEffect,
  1440. rtl, extern: "nsuParseOctInt".} =
  1441. ## Parses an octal integer value contained in `s`.
  1442. ##
  1443. ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
  1444. ## of the following optional prefixes: ``0o``, ``0O``. Underscores within
  1445. ## `s` are ignored.
  1446. var i = 0
  1447. if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
  1448. while true:
  1449. case s[i]
  1450. of '_': inc(i)
  1451. of '0'..'7':
  1452. result = result shl 3 or (ord(s[i]) - ord('0'))
  1453. inc(i)
  1454. of '\0': break
  1455. else: raise newException(ValueError, "invalid integer: " & s)
  1456. proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1457. rtl, extern: "nsuToOct".} =
  1458. ## Converts `x` into its octal representation.
  1459. ##
  1460. ## The resulting string is always `len` characters long. No leading ``0o``
  1461. ## prefix is generated.
  1462. var
  1463. mask: BiggestInt = 7
  1464. shift: BiggestInt = 0
  1465. assert(len > 0)
  1466. result = newString(len)
  1467. for j in countdown(len-1, 0):
  1468. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1469. shift = shift + 3
  1470. mask = mask shl 3
  1471. proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1472. rtl, extern: "nsuToBin".} =
  1473. ## Converts `x` into its binary representation.
  1474. ##
  1475. ## The resulting string is always `len` characters long. No leading ``0b``
  1476. ## prefix is generated.
  1477. var
  1478. mask: BiggestInt = 1
  1479. shift: BiggestInt = 0
  1480. assert(len > 0)
  1481. result = newString(len)
  1482. for j in countdown(len-1, 0):
  1483. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1484. shift = shift + 1
  1485. mask = mask shl 1
  1486. proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect,
  1487. rtl, extern: "nsuInsertSep".} =
  1488. ## Inserts the separator `sep` after `digits` digits from right to left.
  1489. ##
  1490. ## Even though the algorithm works with any string `s`, it is only useful
  1491. ## if `s` contains a number.
  1492. ## Example: ``insertSep("1000000") == "1_000_000"``
  1493. var L = (s.len-1) div digits + s.len
  1494. result = newString(L)
  1495. var j = 0
  1496. dec(L)
  1497. for i in countdown(len(s)-1, 0):
  1498. if j == digits:
  1499. result[L] = sep
  1500. dec(L)
  1501. j = 0
  1502. result[L] = s[i]
  1503. inc(j)
  1504. dec(L)
  1505. proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1506. rtl, extern: "nsuEscape".} =
  1507. ## Escapes a string `s`.
  1508. ##
  1509. ## This does these operations (at the same time):
  1510. ## * replaces any ``\`` by ``\\``
  1511. ## * replaces any ``'`` by ``\'``
  1512. ## * replaces any ``"`` by ``\"``
  1513. ## * replaces any other character in the set ``{'\0'..'\31', '\127'..'\255'}``
  1514. ## by ``\xHH`` where ``HH`` is its hexadecimal value.
  1515. ## The procedure has been designed so that its output is usable for many
  1516. ## different common syntaxes. The resulting string is prefixed with
  1517. ## `prefix` and suffixed with `suffix`. Both may be empty strings.
  1518. ## **Note**: This is not correct for producing Ansi C code!
  1519. result = newStringOfCap(s.len + s.len shr 2)
  1520. result.add(prefix)
  1521. for c in items(s):
  1522. case c
  1523. of '\0'..'\31', '\127'..'\255':
  1524. add(result, "\\x")
  1525. add(result, toHex(ord(c), 2))
  1526. of '\\': add(result, "\\\\")
  1527. of '\'': add(result, "\\'")
  1528. of '\"': add(result, "\\\"")
  1529. else: add(result, c)
  1530. add(result, suffix)
  1531. proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1532. rtl, extern: "nsuUnescape".} =
  1533. ## Unescapes a string `s`.
  1534. ##
  1535. ## This complements `escape <#escape>`_ as it performs the opposite
  1536. ## operations.
  1537. ##
  1538. ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
  1539. ## ValueError exception will be raised.
  1540. result = newStringOfCap(s.len)
  1541. var i = prefix.len
  1542. if not s.startsWith(prefix):
  1543. raise newException(ValueError,
  1544. "String does not start with a prefix of: " & prefix)
  1545. while true:
  1546. if i == s.len-suffix.len: break
  1547. case s[i]
  1548. of '\\':
  1549. case s[i+1]:
  1550. of 'x':
  1551. inc i, 2
  1552. var c: int
  1553. i += parseutils.parseHex(s, c, i, maxLen=2)
  1554. result.add(chr(c))
  1555. dec i, 2
  1556. of '\\':
  1557. result.add('\\')
  1558. of '\'':
  1559. result.add('\'')
  1560. of '\"':
  1561. result.add('\"')
  1562. else: result.add("\\" & s[i+1])
  1563. inc(i)
  1564. of '\0': break
  1565. else:
  1566. result.add(s[i])
  1567. inc(i)
  1568. if not s.endsWith(suffix):
  1569. raise newException(ValueError,
  1570. "String does not end with a suffix of: " & suffix)
  1571. proc validIdentifier*(s: string): bool {.noSideEffect,
  1572. rtl, extern: "nsuValidIdentifier".} =
  1573. ## Returns true if `s` is a valid identifier.
  1574. ##
  1575. ## A valid identifier starts with a character of the set `IdentStartChars`
  1576. ## and is followed by any number of characters of the set `IdentChars`.
  1577. if s[0] in IdentStartChars:
  1578. for i in 1..s.len-1:
  1579. if s[i] notin IdentChars: return false
  1580. return true
  1581. proc editDistance*(a, b: string): int {.noSideEffect,
  1582. rtl, extern: "nsuEditDistance".} =
  1583. ## Returns the edit distance between `a` and `b`.
  1584. ##
  1585. ## This uses the `Levenshtein`:idx: distance algorithm with only a linear
  1586. ## memory overhead. This implementation is highly optimized!
  1587. var len1 = a.len
  1588. var len2 = b.len
  1589. if len1 > len2:
  1590. # make `b` the longer string
  1591. return editDistance(b, a)
  1592. # strip common prefix:
  1593. var s = 0
  1594. while a[s] == b[s] and a[s] != '\0':
  1595. inc(s)
  1596. dec(len1)
  1597. dec(len2)
  1598. # strip common suffix:
  1599. while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
  1600. dec(len1)
  1601. dec(len2)
  1602. # trivial cases:
  1603. if len1 == 0: return len2
  1604. if len2 == 0: return len1
  1605. # another special case:
  1606. if len1 == 1:
  1607. for j in s..s+len2-1:
  1608. if a[s] == b[j]: return len2 - 1
  1609. return len2
  1610. inc(len1)
  1611. inc(len2)
  1612. var half = len1 shr 1
  1613. # initalize first row:
  1614. #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
  1615. var row: seq[int]
  1616. newSeq(row, len2)
  1617. var e = s + len2 - 1 # end marker
  1618. for i in 1..len2 - half - 1: row[i] = i
  1619. row[0] = len1 - half - 1
  1620. for i in 1 .. len1 - 1:
  1621. var char1 = a[i + s - 1]
  1622. var char2p: int
  1623. var D, x: int
  1624. var p: int
  1625. if i >= len1 - half:
  1626. # skip the upper triangle:
  1627. var offset = i - len1 + half
  1628. char2p = offset
  1629. p = offset
  1630. var c3 = row[p] + ord(char1 != b[s + char2p])
  1631. inc(p)
  1632. inc(char2p)
  1633. x = row[p] + 1
  1634. D = x
  1635. if x > c3: x = c3
  1636. row[p] = x
  1637. inc(p)
  1638. else:
  1639. p = 1
  1640. char2p = 0
  1641. D = i
  1642. x = i
  1643. if i <= half + 1:
  1644. # skip the lower triangle:
  1645. e = len2 + i - half - 2
  1646. # main:
  1647. while p <= e:
  1648. dec(D)
  1649. var c3 = D + ord(char1 != b[char2p + s])
  1650. inc(char2p)
  1651. inc(x)
  1652. if x > c3: x = c3
  1653. D = row[p] + 1
  1654. if x > D: x = D
  1655. row[p] = x
  1656. inc(p)
  1657. # lower triangle sentinel:
  1658. if i <= half:
  1659. dec(D)
  1660. var c3 = D + ord(char1 != b[char2p + s])
  1661. inc(x)
  1662. if x > c3: x = c3
  1663. row[p] = x
  1664. result = row[e]
  1665. #dealloc(row)
  1666. # floating point formating:
  1667. when not defined(js):
  1668. proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
  1669. importc: "sprintf", varargs, noSideEffect.}
  1670. type
  1671. FloatFormatMode* = enum ## the different modes of floating point formating
  1672. ffDefault, ## use the shorter floating point notation
  1673. ffDecimal, ## use decimal floating point notation
  1674. ffScientific ## use scientific notation (using ``e`` character)
  1675. {.deprecated: [TFloatFormat: FloatFormatMode].}
  1676. proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
  1677. precision: range[0..32] = 16;
  1678. decimalSep = '.'): string {.
  1679. noSideEffect, rtl, extern: "nsu$1".} =
  1680. ## Converts a floating point value `f` to a string.
  1681. ##
  1682. ## If ``format == ffDecimal`` then precision is the number of digits to
  1683. ## be printed after the decimal point.
  1684. ## If ``format == ffScientific`` then precision is the maximum number
  1685. ## of significant digits to be printed.
  1686. ## `precision`'s default value is the maximum number of meaningful digits
  1687. ## after the decimal point for Nim's ``biggestFloat`` type.
  1688. ##
  1689. ## If ``precision == 0``, it tries to format it nicely.
  1690. when defined(js):
  1691. var res: cstring
  1692. case format
  1693. of ffDefault:
  1694. {.emit: "`res` = `f`.toString();".}
  1695. of ffDecimal:
  1696. {.emit: "`res` = `f`.toFixed(`precision`);".}
  1697. of ffScientific:
  1698. {.emit: "`res` = `f`.toExponential(`precision`);".}
  1699. result = $res
  1700. for i in 0 ..< result.len:
  1701. # Depending on the locale either dot or comma is produced,
  1702. # but nothing else is possible:
  1703. if result[i] in {'.', ','}: result[i] = decimalsep
  1704. else:
  1705. const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
  1706. var
  1707. frmtstr {.noinit.}: array[0..5, char]
  1708. buf {.noinit.}: array[0..2500, char]
  1709. L: cint
  1710. frmtstr[0] = '%'
  1711. if precision > 0:
  1712. frmtstr[1] = '#'
  1713. frmtstr[2] = '.'
  1714. frmtstr[3] = '*'
  1715. frmtstr[4] = floatFormatToChar[format]
  1716. frmtstr[5] = '\0'
  1717. L = c_sprintf(buf, frmtstr, precision, f)
  1718. else:
  1719. frmtstr[1] = floatFormatToChar[format]
  1720. frmtstr[2] = '\0'
  1721. L = c_sprintf(buf, frmtstr, f)
  1722. result = newString(L)
  1723. for i in 0 ..< L:
  1724. # Depending on the locale either dot or comma is produced,
  1725. # but nothing else is possible:
  1726. if buf[i] in {'.', ','}: result[i] = decimalsep
  1727. else: result[i] = buf[i]
  1728. proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
  1729. precision: range[0..32] = 16; decimalSep = '.'): string {.
  1730. noSideEffect, rtl, extern: "nsu$1".} =
  1731. ## Converts a floating point value `f` to a string.
  1732. ##
  1733. ## If ``format == ffDecimal`` then precision is the number of digits to
  1734. ## be printed after the decimal point.
  1735. ## If ``format == ffScientific`` then precision is the maximum number
  1736. ## of significant digits to be printed.
  1737. ## `precision`'s default value is the maximum number of meaningful digits
  1738. ## after the decimal point for Nim's ``float`` type.
  1739. ##
  1740. ## If ``precision == 0``, it tries to format it nicely.
  1741. result = formatBiggestFloat(f, format, precision, decimalSep)
  1742. proc trimZeros*(x: var string) {.noSideEffect.} =
  1743. ## Trim trailing zeros from a formatted floating point
  1744. ## value (`x`). Modifies the passed value.
  1745. var spl: seq[string]
  1746. if x.contains('.') or x.contains(','):
  1747. if x.contains('e'):
  1748. spl= x.split('e')
  1749. x = spl[0]
  1750. while x[x.high] == '0':
  1751. x.setLen(x.len-1)
  1752. if x[x.high] in [',', '.']:
  1753. x.setLen(x.len-1)
  1754. if spl.len > 0:
  1755. x &= "e" & spl[1]
  1756. type
  1757. BinaryPrefixMode* = enum ## the different names for binary prefixes
  1758. bpIEC, # use the IEC/ISO standard prefixes such as kibi
  1759. bpColloquial # use the colloquial kilo, mega etc
  1760. proc formatSize*(bytes: int64,
  1761. decimalSep = '.',
  1762. prefix = bpIEC,
  1763. includeSpace = false): string {.noSideEffect.} =
  1764. ## Rounds and formats `bytes`.
  1765. ##
  1766. ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
  1767. ## formatted as 1KiB. Set prefix to `bpColloquial` to use the colloquial
  1768. ## names from the SI standard (e.g. k for 1000 being reused as 1024).
  1769. ##
  1770. ## `includeSpace` can be set to true to include the (SI preferred) space
  1771. ## between the number and the unit (e.g. 1 KiB).
  1772. ##
  1773. ## Examples:
  1774. ##
  1775. ## .. code-block:: nim
  1776. ##
  1777. ## formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
  1778. ## formatSize((2.234*1024*1024).int) == "2.234MiB"
  1779. ## formatSize(4096, includeSpace=true) == "4 KiB"
  1780. ## formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  1781. ## formatSize(4096) == "4KiB"
  1782. ## formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  1783. ##
  1784. const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
  1785. const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
  1786. var
  1787. xb: int64 = bytes
  1788. fbytes: float
  1789. last_xb: int64 = bytes
  1790. matchedIndex: int
  1791. prefixes: array[9, string]
  1792. if prefix == bpColloquial:
  1793. prefixes = collPrefixes
  1794. else:
  1795. prefixes = iecPrefixes
  1796. # Iterate through prefixes seeing if value will be greater than
  1797. # 0 in each case
  1798. for index in 1..<prefixes.len:
  1799. last_xb = xb
  1800. xb = bytes div (1'i64 shl (index*10))
  1801. matchedIndex = index
  1802. if xb == 0:
  1803. xb = last_xb
  1804. matchedIndex = index - 1
  1805. break
  1806. # xb has the integer number for the latest value; index should be correct
  1807. fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
  1808. result = formatFloat(fbytes, format=ffDecimal, precision=3, decimalSep=decimalSep)
  1809. result.trimZeros()
  1810. if includeSpace:
  1811. result &= " "
  1812. result &= prefixes[matchedIndex]
  1813. result &= "B"
  1814. proc formatEng*(f: BiggestFloat,
  1815. precision: range[0..32] = 10,
  1816. trim: bool = true,
  1817. siPrefix: bool = false,
  1818. unit: string = nil,
  1819. decimalSep = '.'): string {.noSideEffect.} =
  1820. ## Converts a floating point value `f` to a string using engineering notation.
  1821. ##
  1822. ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an
  1823. ## exponent. Numbers outside of this range will be formatted as a
  1824. ## significand in the range -1000.0<f<1000.0 and an exponent that will always
  1825. ## be an integer multiple of 3, corresponding with the SI prefix scale k, M,
  1826. ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p
  1827. ## etc for numbers with an absolute value less than 1.
  1828. ##
  1829. ## The default configuration (`trim=true` and `precision=10`) shows the
  1830. ## **shortest** form that precisely (up to a maximum of 10 decimal places)
  1831. ## displays the value. For example, 4.100000 will be displayed as 4.1 (which
  1832. ## is mathematically identical) whereas 4.1000003 will be displayed as
  1833. ## 4.1000003.
  1834. ##
  1835. ## If `trim` is set to true, trailing zeros will be removed; if false, the
  1836. ## number of digits specified by `precision` will always be shown.
  1837. ##
  1838. ## `precision` can be used to set the number of digits to be shown after the
  1839. ## decimal point or (if `trim` is true) the maximum number of digits to be
  1840. ## shown.
  1841. ##
  1842. ## .. code-block:: nim
  1843. ##
  1844. ## formatEng(0, 2, trim=false) == "0.00"
  1845. ## formatEng(0, 2) == "0"
  1846. ## formatEng(0.053, 0) == "53e-3"
  1847. ## formatEng(52731234, 2) == "52.73e6"
  1848. ## formatEng(-52731234, 2) == "-52.73e6"
  1849. ##
  1850. ## If `siPrefix` is set to true, the number will be displayed with the SI
  1851. ## prefix corresponding to the exponent. For example 4100 will be displayed
  1852. ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place
  1853. ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute
  1854. ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed
  1855. ## with an exponent rather than an SI prefix, regardless of whether
  1856. ## `siPrefix` is true.
  1857. ##
  1858. ## If `unit` is not nil, the provided unit will be appended to the string
  1859. ## (with a space as required by the SI standard). This behaviour is slightly
  1860. ## different to appending the unit to the result as the location of the space
  1861. ## is altered depending on whether there is an exponent.
  1862. ##
  1863. ## .. code-block:: nim
  1864. ##
  1865. ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  1866. ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
  1867. ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  1868. ## formatEng(4100, siPrefix=true) == "4.1 k"
  1869. ## formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit=""
  1870. ## formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  1871. ## formatEng(4100) == "4.1e3"
  1872. ## formatEng(4100, unit="V") == "4.1e3 V"
  1873. ## formatEng(4100, unit="") == "4.1e3 " # Space with unit=""
  1874. ##
  1875. ## `decimalSep` is used as the decimal separator
  1876. var
  1877. absolute: BiggestFloat
  1878. significand: BiggestFloat
  1879. fexponent: BiggestFloat
  1880. exponent: int
  1881. splitResult: seq[string]
  1882. suffix: string = ""
  1883. proc getPrefix(exp: int): char =
  1884. ## Get the SI prefix for a given exponent
  1885. ##
  1886. ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
  1887. const siPrefixes = ['a','f','p','n','u','m',' ','k','M','G','T','P','E']
  1888. var index: int = (exp div 3) + 6
  1889. result = ' '
  1890. if index in low(siPrefixes)..high(siPrefixes):
  1891. result = siPrefixes[index]
  1892. # Most of the work is done with the sign ignored, so get the absolute value
  1893. absolute = abs(f)
  1894. significand = f
  1895. if absolute == 0.0:
  1896. # Simple case: just format it and force the exponent to 0
  1897. exponent = 0
  1898. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  1899. else:
  1900. # Find the best exponent that's a multiple of 3
  1901. fexponent = round(floor(log10(absolute)))
  1902. fexponent = 3.0 * round(floor(fexponent / 3.0))
  1903. # Adjust the significand for the new exponent
  1904. significand /= pow(10.0, fexponent)
  1905. # Round the significand and check whether it has affected
  1906. # the exponent
  1907. significand = round(significand, precision)
  1908. absolute = abs(significand)
  1909. if absolute >= 1000.0:
  1910. significand *= 0.001
  1911. fexponent += 3
  1912. # Components of the result:
  1913. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  1914. exponent = fexponent.int()
  1915. splitResult = result.split('.')
  1916. result = splitResult[0]
  1917. # result should have at most one decimal character
  1918. if splitResult.len() > 1:
  1919. # If trim is set, we get rid of trailing zeros. Don't use trimZeros here as
  1920. # we can be a bit more efficient through knowledge that there will never be
  1921. # an exponent in this part.
  1922. if trim:
  1923. while splitResult[1].endsWith("0"):
  1924. # Trim last character
  1925. splitResult[1].setLen(splitResult[1].len-1)
  1926. if splitResult[1].len() > 0:
  1927. result &= decimalSep & splitResult[1]
  1928. else:
  1929. result &= decimalSep & splitResult[1]
  1930. # Combine the results accordingly
  1931. if siPrefix and exponent != 0:
  1932. var p = getPrefix(exponent)
  1933. if p != ' ':
  1934. suffix = " " & p
  1935. exponent = 0 # Exponent replaced by SI prefix
  1936. if suffix == "" and unit != nil:
  1937. suffix = " "
  1938. if unit != nil:
  1939. suffix &= unit
  1940. if exponent != 0:
  1941. result &= "e" & $exponent
  1942. result &= suffix
  1943. proc findNormalized(x: string, inArray: openArray[string]): int =
  1944. var i = 0
  1945. while i < high(inArray):
  1946. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  1947. inc(i, 2) # incrementing by 1 would probably lead to a
  1948. # security hole...
  1949. return -1
  1950. proc invalidFormatString() {.noinline.} =
  1951. raise newException(ValueError, "invalid format string")
  1952. proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
  1953. noSideEffect, rtl, extern: "nsuAddf".} =
  1954. ## The same as ``add(s, formatstr % a)``, but more efficient.
  1955. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  1956. var i = 0
  1957. var num = 0
  1958. while i < len(formatstr):
  1959. if formatstr[i] == '$':
  1960. case formatstr[i+1] # again we use the fact that strings
  1961. # are zero-terminated here
  1962. of '#':
  1963. if num >% a.high: invalidFormatString()
  1964. add s, a[num]
  1965. inc i, 2
  1966. inc num
  1967. of '$':
  1968. add s, '$'
  1969. inc(i, 2)
  1970. of '1'..'9', '-':
  1971. var j = 0
  1972. inc(i) # skip $
  1973. var negative = formatstr[i] == '-'
  1974. if negative: inc i
  1975. while formatstr[i] in Digits:
  1976. j = j * 10 + ord(formatstr[i]) - ord('0')
  1977. inc(i)
  1978. let idx = if not negative: j-1 else: a.len-j
  1979. if idx >% a.high: invalidFormatString()
  1980. add s, a[idx]
  1981. of '{':
  1982. var j = i+1
  1983. while formatstr[j] notin {'\0', '}'}: inc(j)
  1984. var x = findNormalized(substr(formatstr, i+2, j-1), a)
  1985. if x >= 0 and x < high(a): add s, a[x+1]
  1986. else: invalidFormatString()
  1987. i = j+1
  1988. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  1989. var j = i+1
  1990. while formatstr[j] in PatternChars: inc(j)
  1991. var x = findNormalized(substr(formatstr, i+1, j-1), a)
  1992. if x >= 0 and x < high(a): add s, a[x+1]
  1993. else: invalidFormatString()
  1994. i = j
  1995. else:
  1996. invalidFormatString()
  1997. else:
  1998. add s, formatstr[i]
  1999. inc(i)
  2000. proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect,
  2001. rtl, extern: "nsuFormatOpenArray".} =
  2002. ## Interpolates a format string with the values from `a`.
  2003. ##
  2004. ## The `substitution`:idx: operator performs string substitutions in
  2005. ## `formatstr` and returns a modified `formatstr`. This is often called
  2006. ## `string interpolation`:idx:.
  2007. ##
  2008. ## This is best explained by an example:
  2009. ##
  2010. ## .. code-block:: nim
  2011. ## "$1 eats $2." % ["The cat", "fish"]
  2012. ##
  2013. ## Results in:
  2014. ##
  2015. ## .. code-block:: nim
  2016. ## "The cat eats fish."
  2017. ##
  2018. ## The substitution variables (the thing after the ``$``) are enumerated
  2019. ## from 1 to ``a.len``.
  2020. ## To produce a verbatim ``$``, use ``$$``.
  2021. ## The notation ``$#`` can be used to refer to the next substitution
  2022. ## variable:
  2023. ##
  2024. ## .. code-block:: nim
  2025. ## "$# eats $#." % ["The cat", "fish"]
  2026. ##
  2027. ## Substitution variables can also be words (that is
  2028. ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
  2029. ## indices are keys and with odd indices are the corresponding values.
  2030. ## An example:
  2031. ##
  2032. ## .. code-block:: nim
  2033. ## "$animal eats $food." % ["animal", "The cat", "food", "fish"]
  2034. ##
  2035. ## Results in:
  2036. ##
  2037. ## .. code-block:: nim
  2038. ## "The cat eats fish."
  2039. ##
  2040. ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
  2041. ## raised if an ill-formed format string has been passed to the `%` operator.
  2042. result = newStringOfCap(formatstr.len + a.len shl 4)
  2043. addf(result, formatstr, a)
  2044. proc `%` *(formatstr, a: string): string {.noSideEffect,
  2045. rtl, extern: "nsuFormatSingleElem".} =
  2046. ## This is the same as ``formatstr % [a]``.
  2047. result = newStringOfCap(formatstr.len + a.len)
  2048. addf(result, formatstr, [a])
  2049. proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect,
  2050. rtl, extern: "nsuFormatVarargs".} =
  2051. ## This is the same as ``formatstr % a`` except that it supports
  2052. ## auto stringification.
  2053. result = newStringOfCap(formatstr.len + a.len)
  2054. addf(result, formatstr, a)
  2055. {.pop.}
  2056. proc removeSuffix*(s: var string, chars: set[char] = Newlines) {.
  2057. rtl, extern: "nsuRemoveSuffixCharSet".} =
  2058. ## Removes the first matching character from the string (in-place) given a
  2059. ## set of characters. If the set of characters is only equal to `Newlines`
  2060. ## then it will remove both the newline and return feed.
  2061. ## .. code-block:: nim
  2062. ## var
  2063. ## userInput = "Hello World!\r\n"
  2064. ## otherInput = "Hello!?!"
  2065. ## userInput.removeSuffix
  2066. ## userInput == "Hello World!"
  2067. ## userInput.removeSuffix({'!', '?'})
  2068. ## userInput == "Hello World"
  2069. ## otherInput.removeSuffix({'!', '?'})
  2070. ## otherInput == "Hello!?"
  2071. if s.len == 0: return
  2072. var last = len(s) - 1
  2073. if chars == Newlines:
  2074. if s[last] == '\10':
  2075. last -= 1
  2076. if s[last] == '\13':
  2077. last -= 1
  2078. else:
  2079. if s[last] in chars:
  2080. last -= 1
  2081. s.setLen(last + 1)
  2082. proc removeSuffix*(s: var string, c: char) {.
  2083. rtl, extern: "nsuRemoveSuffixChar".} =
  2084. ## Removes a single character (in-place) from a string.
  2085. ## .. code-block:: nim
  2086. ## var
  2087. ## table = "users"
  2088. ## table.removeSuffix('s')
  2089. ## table == "user"
  2090. removeSuffix(s, chars = {c})
  2091. proc removeSuffix*(s: var string, suffix: string) {.
  2092. rtl, extern: "nsuRemoveSuffixString".} =
  2093. ## Remove the first matching suffix (in-place) from a string.
  2094. ## .. code-block:: nim
  2095. ## var
  2096. ## answers = "yeses"
  2097. ## answers.removeSuffix("es")
  2098. ## answers == "yes"
  2099. var newLen = s.len
  2100. if s.endsWith(suffix):
  2101. newLen -= len(suffix)
  2102. s.setLen(newLen)
  2103. when isMainModule:
  2104. doAssert align("abc", 4) == " abc"
  2105. doAssert align("a", 0) == "a"
  2106. doAssert align("1232", 6) == " 1232"
  2107. doAssert align("1232", 6, '#') == "##1232"
  2108. let
  2109. inp = """ this is a long text -- muchlongerthan10chars and here
  2110. it goes"""
  2111. outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
  2112. doAssert wordWrap(inp, 10, false) == outp
  2113. doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001"
  2114. doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in
  2115. ["1,0e-11", "1,0e-011"]
  2116. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  2117. block: # formatSize tests
  2118. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
  2119. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  2120. doAssert formatSize(4096) == "4KiB"
  2121. doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  2122. doAssert formatSize(4096, includeSpace=true) == "4 KiB"
  2123. doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  2124. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  2125. "The cat eats fish."
  2126. doAssert "-ld a-ldz -ld".replaceWord("-ld") == " a-ldz "
  2127. doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc"
  2128. type MyEnum = enum enA, enB, enC, enuD, enE
  2129. doAssert parseEnum[MyEnum]("enu_D") == enuD
  2130. doAssert parseEnum("invalid enum value", enC) == enC
  2131. doAssert center("foo", 13) == " foo "
  2132. doAssert center("foo", 0) == "foo"
  2133. doAssert center("foo", 3, fillChar = 'a') == "foo"
  2134. doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
  2135. doAssert count("foofoofoo", "foofoo") == 1
  2136. doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
  2137. doAssert count("foofoofoo", 'f') == 3
  2138. doAssert count("foofoofoobar", {'f','b'}) == 4
  2139. doAssert strip(" foofoofoo ") == "foofoofoo"
  2140. doAssert strip("sfoofoofoos", chars = {'s'}) == "foofoofoo"
  2141. doAssert strip("barfoofoofoobar", chars = {'b', 'a', 'r'}) == "foofoofoo"
  2142. doAssert strip("stripme but don't strip this stripme",
  2143. chars = {'s', 't', 'r', 'i', 'p', 'm', 'e'}) ==
  2144. " but don't strip this "
  2145. doAssert strip("sfoofoofoos", leading = false, chars = {'s'}) == "sfoofoofoo"
  2146. doAssert strip("sfoofoofoos", trailing = false, chars = {'s'}) == "foofoofoos"
  2147. doAssert " foo\n bar".indent(4, "Q") == "QQQQ foo\nQQQQ bar"
  2148. doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
  2149. doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.", "PEOPLE!")) == "HELLO PEOPLE!"
  2150. doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
  2151. doAssert isAlphaAscii('r')
  2152. doAssert isAlphaAscii('A')
  2153. doAssert(not isAlphaAscii('$'))
  2154. doAssert isAlphaAscii("Rasp")
  2155. doAssert isAlphaAscii("Args")
  2156. doAssert(not isAlphaAscii("$Tomato"))
  2157. doAssert isAlphaNumeric('3')
  2158. doAssert isAlphaNumeric('R')
  2159. doAssert(not isAlphaNumeric('!'))
  2160. doAssert isAlphaNumeric("34ABc")
  2161. doAssert isAlphaNumeric("Rad")
  2162. doAssert isAlphaNumeric("1234")
  2163. doAssert(not isAlphaNumeric("@nose"))
  2164. doAssert isDigit('3')
  2165. doAssert(not isDigit('a'))
  2166. doAssert(not isDigit('%'))
  2167. doAssert isDigit("12533")
  2168. doAssert(not isDigit("12.33"))
  2169. doAssert(not isDigit("A45b"))
  2170. doAssert isSpaceAscii('\t')
  2171. doAssert isSpaceAscii('\l')
  2172. doAssert(not isSpaceAscii('A'))
  2173. doAssert isSpaceAscii("\t\l \v\r\f")
  2174. doAssert isSpaceAscii(" ")
  2175. doAssert(not isSpaceAscii("ABc \td"))
  2176. doAssert(isNilOrEmpty(""))
  2177. doAssert(isNilOrEmpty(nil))
  2178. doAssert(not isNilOrEmpty("test"))
  2179. doAssert(not isNilOrEmpty(" "))
  2180. doAssert(isNilOrWhitespace(""))
  2181. doAssert(isNilOrWhitespace(nil))
  2182. doAssert(isNilOrWhitespace(" "))
  2183. doAssert(isNilOrWhitespace("\t\l \v\r\f"))
  2184. doAssert(not isNilOrWhitespace("ABc \td"))
  2185. doAssert isLowerAscii('a')
  2186. doAssert isLowerAscii('z')
  2187. doAssert(not isLowerAscii('A'))
  2188. doAssert(not isLowerAscii('5'))
  2189. doAssert(not isLowerAscii('&'))
  2190. doAssert isLowerAscii("abcd")
  2191. doAssert(not isLowerAscii("abCD"))
  2192. doAssert(not isLowerAscii("33aa"))
  2193. doAssert isUpperAscii('A')
  2194. doAssert(not isUpperAscii('b'))
  2195. doAssert(not isUpperAscii('5'))
  2196. doAssert(not isUpperAscii('%'))
  2197. doAssert isUpperAscii("ABC")
  2198. doAssert(not isUpperAscii("AAcc"))
  2199. doAssert(not isUpperAscii("A#$"))
  2200. doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
  2201. doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
  2202. doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
  2203. doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
  2204. doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
  2205. doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
  2206. doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
  2207. doAssert(unescape(r"\x013", "", "") == "\x013")
  2208. doAssert join(["foo", "bar", "baz"]) == "foobarbaz"
  2209. doAssert join(@["foo", "bar", "baz"], ", ") == "foo, bar, baz"
  2210. doAssert join([1, 2, 3]) == "123"
  2211. doAssert join(@[1, 2, 3], ", ") == "1, 2, 3"
  2212. doAssert """~~!!foo
  2213. ~~!!bar
  2214. ~~!!baz""".unindent(2, "~~!!") == "foo\nbar\nbaz"
  2215. doAssert """~~!!foo
  2216. ~~!!bar
  2217. ~~!!baz""".unindent(2, "~~!!aa") == "~~!!foo\n~~!!bar\n~~!!baz"
  2218. doAssert """~~foo
  2219. ~~ bar
  2220. ~~ baz""".unindent(4, "~") == "foo\n bar\n baz"
  2221. doAssert """foo
  2222. bar
  2223. baz
  2224. """.unindent(4) == "foo\nbar\nbaz\n"
  2225. doAssert """foo
  2226. bar
  2227. baz
  2228. """.unindent(2) == "foo\n bar\n baz\n"
  2229. doAssert """foo
  2230. bar
  2231. baz
  2232. """.unindent(100) == "foo\nbar\nbaz\n"
  2233. doAssert """foo
  2234. foo
  2235. bar
  2236. """.unindent() == "foo\nfoo\nbar\n"
  2237. let s = " this is an example "
  2238. let s2 = ":this;is;an:example;;"
  2239. doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
  2240. doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""]
  2241. doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "]
  2242. doAssert s.split(' ', maxsplit=1) == @["", "this is an example "]
  2243. doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "]
  2244. block: # formatEng tests
  2245. doAssert formatEng(0, 2, trim=false) == "0.00"
  2246. doAssert formatEng(0, 2) == "0"
  2247. doAssert formatEng(53, 2, trim=false) == "53.00"
  2248. doAssert formatEng(0.053, 2, trim=false) == "53.00e-3"
  2249. doAssert formatEng(0.053, 4, trim=false) == "53.0000e-3"
  2250. doAssert formatEng(0.053, 4, trim=true) == "53e-3"
  2251. doAssert formatEng(0.053, 0) == "53e-3"
  2252. doAssert formatEng(52731234) == "52.731234e6"
  2253. doAssert formatEng(-52731234) == "-52.731234e6"
  2254. doAssert formatEng(52731234, 1) == "52.7e6"
  2255. doAssert formatEng(-52731234, 1) == "-52.7e6"
  2256. doAssert formatEng(52731234, 1, decimalSep=',') == "52,7e6"
  2257. doAssert formatEng(-52731234, 1, decimalSep=',') == "-52,7e6"
  2258. doAssert formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2259. doAssert formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
  2260. doAssert formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2261. doAssert formatEng(4100, siPrefix=true) == "4.1 k"
  2262. doAssert formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Includes space
  2263. doAssert formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2264. doAssert formatEng(4100) == "4.1e3"
  2265. doAssert formatEng(4100, unit="V") == "4.1e3 V"
  2266. doAssert formatEng(4100, unit="") == "4.1e3 " # Space with unit=""
  2267. # Don't use SI prefix as number is too big
  2268. doAssert formatEng(3.1e22, siPrefix=true, unit="a") == "31e21 a"
  2269. # Don't use SI prefix as number is too small
  2270. doAssert formatEng(3.1e-25, siPrefix=true, unit="A") == "310e-27 A"
  2271. block: # startsWith / endsWith char tests
  2272. var s = "abcdef"
  2273. doAssert s.startsWith('a')
  2274. doAssert s.startsWith('b') == false
  2275. doAssert s.endsWith('f')
  2276. doAssert s.endsWith('a') == false
  2277. doAssert s.endsWith('\0') == false
  2278. #echo("strutils tests passed")