wordwrap.nim 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2018 Nim contributors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains an algorithm to wordwrap a Unicode string.
  10. import strutils, unicode
  11. proc olen(s: string; start, lastExclusive: int): int =
  12. var i = start
  13. result = 0
  14. while i < lastExclusive:
  15. inc result
  16. let L = graphemeLen(s, i)
  17. inc i, L
  18. proc wrapWords*(s: string, maxLineWidth = 80,
  19. splitLongWords = true,
  20. seps: set[char] = Whitespace,
  21. newLine = "\n"): string {.noSideEffect.} =
  22. ## Word wraps `s`.
  23. runnableExamples:
  24. doAssert "12345678901234567890".wrapWords() == "12345678901234567890"
  25. doAssert "123456789012345678901234567890".wrapWords(20) == "12345678901234567890\n1234567890"
  26. doAssert "Hello Bob. Hello John.".wrapWords(13, false) == "Hello Bob.\nHello John."
  27. doAssert "Hello Bob. Hello John.".wrapWords(13, true, {';'}) == "Hello Bob. He\nllo John."
  28. result = newStringOfCap(s.len + s.len shr 6)
  29. var spaceLeft = maxLineWidth
  30. var lastSep = ""
  31. var i = 0
  32. while true:
  33. var j = i
  34. let isSep = j < s.len and s[j] in seps
  35. while j < s.len and (s[j] in seps) == isSep: inc(j)
  36. if j <= i: break
  37. #yield (substr(s, i, j-1), isSep)
  38. if isSep:
  39. lastSep.setLen 0
  40. for k in i..<j:
  41. if s[k] notin {'\L', '\C'}: lastSep.add s[k]
  42. if lastSep.len == 0:
  43. lastSep.add ' '
  44. dec spaceLeft
  45. else:
  46. spaceLeft = spaceLeft - olen(lastSep, 0, lastSep.len)
  47. else:
  48. let wlen = olen(s, i, j)
  49. if wlen > spaceLeft:
  50. if splitLongWords and wlen > maxLineWidth:
  51. var k = 0
  52. while k < j - i:
  53. if spaceLeft <= 0:
  54. spaceLeft = maxLineWidth
  55. result.add newLine
  56. dec spaceLeft
  57. let L = graphemeLen(s, k+i)
  58. for m in 0 ..< L: result.add s[i+k+m]
  59. inc k, L
  60. else:
  61. spaceLeft = maxLineWidth - wlen
  62. result.add(newLine)
  63. for k in i..<j: result.add(s[k])
  64. else:
  65. spaceLeft = spaceLeft - wlen
  66. result.add(lastSep)
  67. for k in i..<j: result.add(s[k])
  68. #lastSep.setLen(0)
  69. i = j
  70. when isMainModule:
  71. when true:
  72. let
  73. inp = """ this is a long text -- muchlongerthan10chars and here
  74. it goes"""
  75. outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
  76. doAssert wrapWords(inp, 10, false) == outp
  77. let
  78. longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
  79. longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
  80. doAssert wrapWords(longInp, 8, true) == longOutp
  81. # test we don't break Umlauts into invalid bytes:
  82. let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß"
  83. let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß"
  84. doAssert wrapWords(fies, 1, true) == fiesRes
  85. let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö
  86. äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen
  87. rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr
  88. iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
  89. ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
  90. let longlongwordRes = """
  91. abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp
  92. psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü
  93. öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq
  94. fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf
  95. qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr
  96. tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ
  97. ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
  98. ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
  99. doAssert wrapWords(longlongword) == longlongwordRes
  100. # bug #14579
  101. const input60 = """
  102. This string is wrapped to 60 characters. If we call
  103. wrapwords on it it will be re-wrapped to 80 characters.
  104. """
  105. const input60Res = """This string is wrapped to 60 characters. If we call wrapwords on it it will be
  106. re-wrapped to 80 characters."""
  107. doAssert wrapWords(input60) == input60Res