test_ngramparser.py 1007 B

12345678910111213141516171819202122232425262728
  1. import unittest
  2. import sys
  3. import string
  4. from operator import itemgetter
  5. import re
  6. import collections
  7. import os
  8. sys.path.append("../dataparsers")
  9. import parse_google_ngram
  10. class testNgramParser(unittest.TestCase):
  11. def test1gramParser(self):
  12. ngramFile = "mockdata/unlikely1gram.txt"
  13. generatedFile = "mockdata/frequencyfile.txt"
  14. startYear = 1980
  15. minLength = 1
  16. maxWords = 4
  17. expectedGeneratedList = ['alcock', 'alberico', 'alkalinizing', "al'cock"]
  18. word_list = {}
  19. assert os.path.isfile(ngramFile)
  20. parse_google_ngram.process_ngram_file(ngramFile, word_list)
  21. sorted_list = sorted(word_list.iteritems(), key=itemgetter(1), reverse=True)
  22. generatedList=parse_google_ngram.build_word_list(sorted_list, generatedFile, maxWords, minLength)
  23. assert os.path.isfile(generatedFile)
  24. self.assertEqual(generatedList,expectedGeneratedList)
  25. os.remove(generatedFile)
  26. if __name__ == '__main__':
  27. unittest.main()