find.nim 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import unittest, sequtils
  2. import nre except toSeq
  3. import optional_nonstrict
  4. import times, strutils
  5. suite "find":
  6. test "find text":
  7. check("3213a".find(re"[a-z]").match == "a")
  8. check(toSeq(findIter("1 2 3 4 5 6 7 8 ", re" ")).map(
  9. proc (a: RegexMatch): string = a.match
  10. ) == @[" ", " ", " ", " ", " ", " ", " ", " "])
  11. test "find bounds":
  12. check(toSeq(findIter("1 2 3 4 5 ", re" ")).map(
  13. proc (a: RegexMatch): Slice[int] = a.matchBounds
  14. ) == @[1..1, 3..3, 5..5, 7..7, 9..9])
  15. test "overlapping find":
  16. check("222".findAll(re"22") == @["22"])
  17. check("2222".findAll(re"22") == @["22", "22"])
  18. test "len 0 find":
  19. check("".findAll(re"\ ") == newSeq[string]())
  20. check("".findAll(re"") == @[""])
  21. check("abc".findAll(re"") == @["", "", "", ""])
  22. check("word word".findAll(re"\b") == @["", "", "", ""])
  23. check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""])
  24. check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""])
  25. test "bail early":
  26. ## we expect nothing to be found and we should be bailing out early which means that
  27. ## the timing difference between searching in small and large data should be well
  28. ## within a tolerance margin
  29. const small = 10
  30. const large = 1000
  31. var smallData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\" ", small)
  32. var largeData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\" ", large)
  33. var expression = re"^url.* = "(.*?)""
  34. check(smallData.findAll(expression) == newSeq[string]())
  35. check(largeData.findAll(expression) == newSeq[string]())