1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- discard """
- action: compile
- """
- import os, parseutils, threadpool, strutils
- type
- Stats = ref object
- domainCode, pageTitle: string
- countViews, totalSize: int
- proc newStats(): Stats =
- Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0)
- proc `$`(stats: Stats): string =
- "(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [
- stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize
- ]
- proc parse(line: string, domainCode, pageTitle: var string,
- countViews, totalSize: var int) =
- if line.len == 0: return
- var i = 0
- domainCode.setLen(0)
- i.inc parseUntil(line, domainCode, {' '}, i)
- i.inc
- pageTitle.setLen(0)
- i.inc parseUntil(line, pageTitle, {' '}, i)
- i.inc
- countViews = 0
- i.inc parseInt(line, countViews, i)
- i.inc
- totalSize = 0
- i.inc parseInt(line, totalSize, i)
- proc parseChunk(chunk: string): Stats =
- result = newStats()
- var domainCode = ""
- var pageTitle = ""
- var countViews = 0
- var totalSize = 0
- for line in splitLines(chunk):
- parse(line, domainCode, pageTitle, countViews, totalSize)
- if domainCode == "en" and countViews > result.countViews:
- result = Stats(domainCode: domainCode, pageTitle: pageTitle,
- countViews: countViews, totalSize: totalSize)
- proc readPageCounts(filename: string, chunkSize = 1_000_000) =
- var file = open(filename)
- var responses = newSeq[FlowVar[Stats]]()
- var buffer = newString(chunksize)
- var oldBufferLen = 0
- while not endOfFile(file):
- let reqSize = chunksize - oldBufferLen
- let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen
- var chunkLen = readSize
- while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
- chunkLen.dec
- responses.add(spawn parseChunk(buffer[0 ..< chunkLen]))
- oldBufferLen = readSize - chunkLen
- buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
- var mostPopular = newStats()
- for resp in responses:
- let statistic = ^resp
- if statistic.countViews > mostPopular.countViews:
- mostPopular = statistic
- echo("Most popular is: ", mostPopular)
- when true:
- const file = "pagecounts-20160101-050000"
- let filename = getCurrentDir() / file
- readPageCounts(filename)
|