12345678910111213141516171819202122232425262728 |
- import unittest
- import sys
- import string
- from operator import itemgetter
- import re
- import collections
- import os
- sys.path.append("../dataparsers")
- import parse_google_ngram
- class testNgramParser(unittest.TestCase):
- def test1gramParser(self):
- ngramFile = "mockdata/unlikely1gram.txt"
- generatedFile = "mockdata/frequencyfile.txt"
- startYear = 1980
- minLength = 1
- maxWords = 4
- expectedGeneratedList = ['alcock', 'alberico', 'alkalinizing', "al'cock"]
- word_list = {}
- assert os.path.isfile(ngramFile)
- parse_google_ngram.process_ngram_file(ngramFile, word_list)
- sorted_list = sorted(word_list.iteritems(), key=itemgetter(1), reverse=True)
- generatedList=parse_google_ngram.build_word_list(sorted_list, generatedFile, maxWords, minLength)
- assert os.path.isfile(generatedFile)
- self.assertEqual(generatedList,expectedGeneratedList)
- os.remove(generatedFile)
- if __name__ == '__main__':
- unittest.main()
|