concurrency.nim 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. discard """
  2. action: compile
  3. """
  4. # See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
  5. import tables, parseutils, strutils, threadpool
  6. const filename = "pagecounts-20160101-050000"
  7. type
  8. Stats = ref object
  9. projectName, pageTitle: string
  10. requests, contentSize: int
  11. proc `$`(stats: Stats): string =
  12. "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
  13. stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
  14. ]
  15. proc parse(chunk: string): Stats =
  16. # Each line looks like: en Main_Page 242332 4737756101
  17. result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
  18. var projectName = ""
  19. var pageTitle = ""
  20. var requests = ""
  21. var contentSize = ""
  22. for line in chunk.splitLines:
  23. var i = 0
  24. projectName.setLen(0)
  25. i.inc parseUntil(line, projectName, Whitespace, i)
  26. i.inc skipWhitespace(line, i)
  27. pageTitle.setLen(0)
  28. i.inc parseUntil(line, pageTitle, Whitespace, i)
  29. i.inc skipWhitespace(line, i)
  30. requests.setLen(0)
  31. i.inc parseUntil(line, requests, Whitespace, i)
  32. i.inc skipWhitespace(line, i)
  33. contentSize.setLen(0)
  34. i.inc parseUntil(line, contentSize, Whitespace, i)
  35. i.inc skipWhitespace(line, i)
  36. if requests.len == 0 or contentSize.len == 0:
  37. # Ignore lines with either of the params that are empty.
  38. continue
  39. let requestsInt = requests.parseInt
  40. if requestsInt > result.requests and projectName == "en":
  41. result = Stats(
  42. projectName: projectName,
  43. pageTitle: pageTitle,
  44. requests: requestsInt,
  45. contentSize: contentSize.parseInt
  46. )
  47. proc readChunks(filename: string, chunksize = 1000000): Stats =
  48. result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
  49. var file = open(filename)
  50. var responses = newSeq[FlowVar[Stats]]()
  51. var buffer = newString(chunksize)
  52. var oldBufferLen = 0
  53. while not endOfFile(file):
  54. let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
  55. var chunkLen = readSize
  56. while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
  57. # Find where the last line ends
  58. chunkLen.dec
  59. responses.add(spawn parse(buffer[0 ..< chunkLen]))
  60. oldBufferLen = readSize - chunkLen
  61. buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
  62. for resp in responses:
  63. let statistic = ^resp
  64. if statistic.requests > result.requests:
  65. result = statistic
  66. file.close()
  67. when true:
  68. echo readChunks(filename)