AnimalSorter.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # animal-sorter.py
  5. #
  6. # Sorts the animal pictures into folders based on a trained tensorflow model.
  7. #
  8. # Copyright 2022 Stephen Stengel <stephen.stengel@cwu.edu> and friends
  9. #
  10. import os
  11. import shutil
  12. import tensorflow as tf
  13. import time
  14. from skimage.io import imsave
  15. from skimage.util import img_as_ubyte, img_as_float
  16. import numpy as np
  17. # ~ from models import currentBestModel
  18. #Current plan:
  19. #start with making the sorting function work.
  20. #Add the funtion to the run button with default folder values for testing.
  21. #Then:
  22. #Open up a splash screen when user clicks on the exe
  23. #Load up the imports required
  24. #delete the splash screen with gtk.clear or something.
  25. #load up the actual program window
  26. #In the main window, use the file paths as the inputs to the sorting function.
  27. CLASS_BOBCAT = 0
  28. CLASS_COYOTE = 1
  29. CLASS_DEER = 2
  30. CLASS_ELK = 3
  31. CLASS_HUMAN = 4
  32. CLASS_NOT_INTERESTING = 5
  33. CLASS_RACCOON = 6
  34. CLASS_WEASEL = 7
  35. CLASS_BOBCAT_STRING = "bobcat"
  36. CLASS_COYOTE_STRING = "coyote"
  37. CLASS_DEER_STRING = "deer"
  38. CLASS_ELK_STRING = "elk"
  39. CLASS_HUMAN_STRING = "human"
  40. CLASS_RACCOON_STRING = "raccoon"
  41. CLASS_WEASEL_STRING = "weasel"
  42. CLASS_NOT_INTERESTING_STRING = "not"
  43. CLASS_NAMES_LIST_INT = [CLASS_BOBCAT, CLASS_COYOTE, CLASS_DEER, CLASS_ELK, CLASS_HUMAN, CLASS_NOT_INTERESTING, CLASS_RACCOON, CLASS_WEASEL]
  44. CLASS_NAMES_LIST_STR = [CLASS_BOBCAT_STRING, CLASS_COYOTE_STRING, CLASS_DEER_STRING, CLASS_ELK_STRING, CLASS_HUMAN_STRING, CLASS_NOT_INTERESTING_STRING, CLASS_RACCOON_STRING, CLASS_WEASEL_STRING]
  45. ## We really should make a config file so that we don't need all these globals. ##
  46. IMG_WIDTH = 100
  47. IMG_HEIGHT = 100
  48. IMG_CHANNELS = 3
  49. IMG_SHAPE_TUPPLE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
  50. CHECKPOINT_FOLDER = os.path.normpath("./checkpoint/")
  51. print("Checkpoint folder: " + CHECKPOINT_FOLDER)
  52. #Testing print
  53. PRESENT_DIRECTORY = os.path.normpath(os.path.dirname(__file__) )
  54. print("Present directory: " + PRESENT_DIRECTORY)
  55. def sortAnimalsIntoFolders(sourceStr, destStr, progress_bar):
  56. print("Testing! WOW!")
  57. print("Source dir: " + str(sourceStr))
  58. print("Destenation dir: " + str(destStr))
  59. progress_bar.pulse()
  60. #create the folder structure within the destination directory.
  61. print("Setting up output directories...")
  62. foldersToCreate = createOutputFoldernames(CLASS_NAMES_LIST_STR, destStr)
  63. makeDirectories(foldersToCreate)
  64. progress_bar.pulse()
  65. #Load the model from models.py. This is currently blocking the gui. (can't update window till done)
  66. #maybe this should be a part of the splash screen step?
  67. #just pass the model in as a parameter.
  68. #maybe change this to a class object and save as a field.? more work tho.
  69. #another solution: a separate button to load the model, keep the run button disabled until it is readied.
  70. # ~ print("Loading model...")
  71. # ~ theModel = currentBestModel(IMG_SHAPE_TUPPLE)
  72. # ~ print("printing summary...")
  73. # ~ theModel.summary()
  74. #Load the checkpoint weights.
  75. # ~ print("Loading weights...")
  76. # ~ theModel.load_weights(os.path.abspath(CHECKPOINT_FOLDER))
  77. #Turn the input images into a dataset?
  78. print("Loading the dataset...")
  79. images_ds, originalFullNames = createDatasetFromImages(sourceStr)
  80. progress_bar.pulse()
  81. #normalize file paths for all operating systems
  82. originalFullNames = normalizeAllNames(originalFullNames)
  83. #strip base path from original names
  84. originalNames = stripBasepathFromFilenames(originalFullNames)
  85. progress_bar.pulse()
  86. #It might be faster to load the model and weights separately. need testing.
  87. print("Loading model...")
  88. # ~ print("COMENTED OUT FOR TESTING!")
  89. theModel = tf.keras.models.load_model(CHECKPOINT_FOLDER)
  90. theModel.summary()
  91. progress_bar.pulse()
  92. #Get a list of predictions
  93. print("Making predictions...")
  94. startTime = time.time()
  95. predictionsArray = theModel.predict( \
  96. images_ds,
  97. verbose = 2, #shows a line? If we can print this to file, we can use it to inform our status bar.
  98. steps = 2, #only predict two batches of 32 pictures to test faster.
  99. )
  100. elapsedTime = time.time() - startTime
  101. print(str(predictionsArray))
  102. print("Prediction took: " + str(elapsedTime) + " seconds.")
  103. progress_bar.pulse()
  104. #For each prediction, put image into correct folder.
  105. # ~ sortPredictions(images_ds, predictionsArray, sourceStr, destStr, CLASS_NAMES_LIST_INT, CLASS_NAMES_LIST_STR)
  106. copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, CLASS_NAMES_LIST_INT, CLASS_NAMES_LIST_STR)
  107. progress_bar.pulse()
  108. print("Done!")
  109. def normalizeAllNames(originalFullNames):
  110. outList = []
  111. for name in originalFullNames:
  112. outList.append( os.path.normpath(name) )
  113. return outList
  114. def copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, classNamesListInt, classNamesListStr):
  115. #Get predicted labels in integer form.
  116. labelsListInt = getPredictedLabels(predictionsArray)
  117. #get all predicted labels in string form for use as output folders
  118. labelsListStr = getAllOutFoldersStr(classNamesListStr, labelsListInt)
  119. for i in range(len(predictionsArray)):
  120. thisOutClassFolder = labelsListStr[i] #get this out folder
  121. thisOutName = originalNames[i]
  122. #get full path of output name
  123. thisOutputFolder = os.path.join(destStr, thisOutClassFolder)
  124. #copy original to destination
  125. thisFullOriginalName = originalFullNames[i]
  126. try:
  127. shutil.copy2( thisFullOriginalName, thisOutputFolder)
  128. except:
  129. print("copy skipping: " + str(thisName))
  130. #Takes a list of file paths and returns a tensorflow dataset object.
  131. #Resize to the same size as the model was trained on.
  132. #NOTE: If we instead load each image individually with: tf.keras.preprocessing.image.load_img()
  133. #We can save them in a numpy array
  134. #then we can use a for loop to predict on each image individually
  135. #this way we can update a loading bar to show how much time is left.
  136. #OR EVEN BETTER:
  137. #To save ram on their shitty government workstations, we can load each
  138. #image individually, and the predict that one image right away.
  139. #That way there is only ever one image in ram. The downside is that there
  140. #will probably be no batching or multiprocessing.? We'll have to test and see!
  141. #OR we could combine the two methods:
  142. #We could load BATCH_NUM images from files, and combine them into a numpy array,
  143. #then predict on each image in the array and sort, then continue with
  144. #the rest of the dataset in this way.
  145. def createDatasetFromImages(sourceFolderStr):
  146. out_ds = tf.keras.preprocessing.image_dataset_from_directory( \
  147. sourceFolderStr,
  148. labels = None,
  149. label_mode = None,
  150. color_mode = "rgb",
  151. image_size = (IMG_HEIGHT, IMG_WIDTH), #triple check it is (h, w). Pillow files are (w, h) and need conversion to numpy/tensorflow by swapping height and width dimensions. (transpose?)
  152. batch_size = 32, #this might need tweaking depending on how much ram their computers have. 32 is default.
  153. shuffle = False,
  154. interpolation = "bilinear", #default is bilinear
  155. )
  156. # Found undocumented filename return lol
  157. # https://stackoverflow.com/questions/62166588/how-to-obtain-filenames-during-prediction-while-using-tf-keras-preprocessing-ima
  158. # ~ fnames = out_ds.file_paths
  159. # ~ for name in fnames:
  160. # ~ print(name)
  161. outNames = out_ds.file_paths
  162. AUTOTUNE = tf.data.AUTOTUNE
  163. normalization_layer = tf.keras.layers.Rescaling(1./255) #for newer versions of tensorflow
  164. out_ds = out_ds.map(lambda x: normalization_layer(x), num_parallel_calls=AUTOTUNE)
  165. return out_ds, outNames
  166. def createOutputFoldernames(namesList, destStr):
  167. outFNames = []
  168. destNormalized = os.path.normpath(destStr)
  169. for name in namesList:
  170. outFNames.append( os.path.join(destNormalized, name) )
  171. return outFNames
  172. # Creates the necessary directories.
  173. def makeDirectories(listOfFoldersToCreate):
  174. for folder in listOfFoldersToCreate:
  175. if not os.path.isdir(folder):
  176. os.makedirs(folder)
  177. #deletes each dir within a list
  178. def deleteDirectories(listDirsToDelete):
  179. for folder in listDirsToDelete:
  180. if os.path.isdir(folder):
  181. shutil.rmtree(folder, ignore_errors = True)
  182. #Need to preserve filenames with os.walk (thats how tensorflow loads them when not shuffled.)
  183. #Investigate! Make sure that the images have the right names when they come out! A small class like weasel should be good for that.
  184. def sortPredictions(images_ds, predictionsArray, sourceStr, destStr, classNamesListInt, classNamesListStr):
  185. #Get list of predictions in int form
  186. labelsListInt = getPredictedLabels(predictionsArray)
  187. #get list of output names given the input names using os.walk (without a base path)
  188. outNamesList = getListOfFilenames(sourceStr)
  189. #Put images in the correct places.
  190. i = 0
  191. for batch in images_ds:
  192. batchArr = np.asarray(batch)
  193. while batchArr is not None: #HUHUHUHU
  194. for j in range(len(batchArr)):
  195. thisImg = img_as_ubyte( batchArr[j] )
  196. thisFolderStr = getOutFolderNameStr(classNamesListStr, labelsListInt[i])
  197. thisFileName = outNamesList[i]
  198. fnameStr = os.path.join(destStr, thisFolderStr, thisFileName)
  199. imsave(fnameStr, thisImg)
  200. i += 1
  201. # ~ for i in range(len(images_ds)):
  202. #could we do for i in range len(_ds) * batch_size .. for j in range len (_ds)??
  203. def getAllOutFoldersStr(classNamesListStr, labelsListInt):
  204. outFoldersList = []
  205. for labelInt in labelsListInt:
  206. outFoldersList.append( getOutFolderNameStr(classNamesListStr, labelInt ) )
  207. return outFoldersList
  208. def getOutFolderNameStr(classNamesListStr, classInt):
  209. return classNamesListStr[ classInt ]
  210. #Returns a list of filenames from the input directory
  211. #uses os.walk, so it should be the same order as tensorflow loads them!!!!!!!!!!!!!!!!!!!!!!!!!!!maybe
  212. #Currently not getting the same filenames as tensorflow :(
  213. def getListOfFilenames(baseDirectory, include_base = False):
  214. myNames = []
  215. for (root, dirNames, fileNames) in os.walk(baseDirectory):
  216. for aFile in fileNames:
  217. if include_base:
  218. myNames.append( os.path.join( root, aFile ) )
  219. else:
  220. myNames.append(aFile)
  221. return myNames
  222. def stripBasepathFromFilenames(inList):
  223. outList = []
  224. for name in inList:
  225. outList.append( os.path.basename(name) )
  226. return outList
  227. #Transform scores array into predicted labels.
  228. def getPredictedLabels(predictedScores):
  229. outList = []
  230. for score in predictedScores:
  231. outList.append(np.argmax(score))
  232. return np.asarray(outList)