easy.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. #!/usr/bin/env python
  2. import sys
  3. import os
  4. from subprocess import *
  5. if len(sys.argv) <= 1:
  6. print 'Usage: %s training_file [testing_file]' % sys.argv[0]
  7. raise SystemExit
  8. # svm, grid, and gnuplot executable files
  9. is_win32 = (sys.platform == 'win32')
  10. if not is_win32:
  11. svmscale_exe = "../svm-scale"
  12. svmtrain_exe = "../svm-train"
  13. svmpredict_exe = "../svm-predict"
  14. grid_py = "./grid.py"
  15. gnuplot_exe = "/usr/bin/gnuplot"
  16. else:
  17. # example for windows
  18. svmscale_exe = r"..\windows\svm-scale.exe"
  19. svmtrain_exe = r"..\windows\svm-train.exe"
  20. svmpredict_exe = r"..\windows\svm-predict.exe"
  21. gnuplot_exe = r"c:\tmp\gnuplot\bin\pgnuplot.exe"
  22. grid_py = r".\grid.py"
  23. assert os.path.exists(svmscale_exe),"svm-scale executable not found"
  24. assert os.path.exists(svmtrain_exe),"svm-train executable not found"
  25. assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
  26. assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
  27. assert os.path.exists(grid_py),"grid.py not found"
  28. train_pathname = sys.argv[1]
  29. assert os.path.exists(train_pathname),"training file not found"
  30. file_name = os.path.split(train_pathname)[1]
  31. scaled_file = file_name + ".scale"
  32. model_file = file_name + ".model"
  33. range_file = file_name + ".range"
  34. if len(sys.argv) > 2:
  35. test_pathname = sys.argv[2]
  36. file_name = os.path.split(test_pathname)[1]
  37. assert os.path.exists(test_pathname),"testing file not found"
  38. scaled_test_file = file_name + ".scale"
  39. predict_test_file = file_name + ".predict"
  40. cmd = '%s -s "%s" "%s" > "%s"' % (svmscale_exe, range_file, train_pathname, scaled_file)
  41. print 'Scaling training data...'
  42. call(cmd, shell = True)
  43. cmd = '%s -svmtrain "%s" -gnuplot "%s" "%s"' % (grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
  44. print 'Cross validation...'
  45. f = Popen(cmd, shell = True, stdout = PIPE).stdout
  46. line = ''
  47. while True:
  48. last_line = line
  49. line = f.readline()
  50. if not line: break
  51. c,g,rate = map(float,last_line.split())
  52. print 'Best c=%s, g=%s CV rate=%s' % (c,g,rate)
  53. cmd = '%s -c %s -g %s "%s" "%s"' % (svmtrain_exe,c,g,scaled_file,model_file)
  54. print 'Training...'
  55. call(cmd, shell = True, stdout = PIPE)
  56. print 'Output model: %s' % model_file
  57. if len(sys.argv) > 2:
  58. cmd = '%s -r "%s" "%s" > "%s"' % (svmscale_exe, range_file, test_pathname, scaled_test_file)
  59. print 'Scaling testing data...'
  60. call(cmd, shell = True)
  61. cmd = '%s "%s" "%s" "%s"' % (svmpredict_exe, scaled_test_file, model_file, predict_test_file)
  62. print 'Testing...'
  63. call(cmd, shell = True)
  64. print 'Output prediction: %s' % predict_test_file