AnimalSorter.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # animal-sorter.py
  5. #
  6. # Sorts the animal pictures into folders based on a trained tensorflow model.
  7. #
  8. # Copyright 2022 Stephen Stengel <stephen.stengel@cwu.edu> and friends
  9. #
  10. import os
  11. # ~ from models import currentBestModel
  12. #Current plan:
  13. #start with making the sorting function work.
  14. #Add the funtion to the run button with default folder values for testing.
  15. #Then:
  16. #Open up a splash screen when user clicks on the exe
  17. #Load up the imports required
  18. #delete the splash screen with gtk.clear or something.
  19. #load up the actual program window
  20. #In the main window, use the file paths as the inputs to the sorting function.
  21. CLASS_BOBCAT = 0
  22. CLASS_COYOTE = 1
  23. CLASS_DEER = 2
  24. CLASS_ELK = 3
  25. CLASS_HUMAN = 4
  26. CLASS_NOT_INTERESTING = 5
  27. CLASS_RACCOON = 6
  28. CLASS_WEASEL = 7
  29. CLASS_BOBCAT_STRING = "bobcat"
  30. CLASS_COYOTE_STRING = "coyote"
  31. CLASS_DEER_STRING = "deer"
  32. CLASS_ELK_STRING = "elk"
  33. CLASS_HUMAN_STRING = "human"
  34. CLASS_RACCOON_STRING = "raccoon"
  35. CLASS_WEASEL_STRING = "weasel"
  36. CLASS_NOT_INTERESTING_STRING = "not"
  37. CLASS_NAMES_LIST_INT = [CLASS_BOBCAT, CLASS_COYOTE, CLASS_DEER, CLASS_ELK, CLASS_HUMAN, CLASS_NOT_INTERESTING, CLASS_RACCOON, CLASS_WEASEL]
  38. CLASS_NAMES_LIST_STR = [CLASS_BOBCAT_STRING, CLASS_COYOTE_STRING, CLASS_DEER_STRING, CLASS_ELK_STRING, CLASS_HUMAN_STRING, CLASS_NOT_INTERESTING_STRING, CLASS_RACCOON_STRING, CLASS_WEASEL_STRING]
  39. ## We really should make a config file so that we don't need all these globals. ##
  40. IMG_WIDTH = 100
  41. IMG_HEIGHT = 100
  42. IMG_CHANNELS = 3
  43. IMG_SHAPE_TUPPLE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
  44. CHECKPOINT_FOLDER = os.path.normpath("./checkpoint/")
  45. print("Checkpoint folder: " + CHECKPOINT_FOLDER)
  46. #Testing print
  47. PRESENT_DIRECTORY = os.path.normpath(os.path.dirname(__file__) )
  48. print("Present directory: " + PRESENT_DIRECTORY)
  49. def sortAnimalsIntoFolders(sourceStr, destStr):
  50. import time
  51. print("Source dir: " + str(sourceStr))
  52. print("Destenation dir: " + str(destStr))
  53. #create the folder structure within the destination directory.
  54. print("Setting up output directories...")
  55. foldersToCreate = createOutputFoldernames(CLASS_NAMES_LIST_STR, destStr)
  56. makeDirectories(foldersToCreate)
  57. #Load the model from models.py. This is currently blocking the gui. (can't update window till done)
  58. #maybe this should be a part of the splash screen step?
  59. #just pass the model in as a parameter.
  60. #maybe change this to a class object and save as a field.? more work tho.
  61. #another solution: a separate button to load the model, keep the run button disabled until it is readied.
  62. # ~ print("Loading model...")
  63. # ~ theModel = currentBestModel(IMG_SHAPE_TUPPLE)
  64. # ~ print("printing summary...")
  65. # ~ theModel.summary()
  66. #Load the checkpoint weights.
  67. # ~ print("Loading weights...")
  68. # ~ theModel.load_weights(os.path.abspath(CHECKPOINT_FOLDER))
  69. #Turn the input images into a dataset?
  70. print("Loading the dataset...")
  71. images_ds, originalFullNames = createDatasetFromImages(sourceStr)
  72. #normalize file paths for all operating systems
  73. originalFullNames = normalizeAllNames(originalFullNames)
  74. #strip base path from original names
  75. originalNames = stripBasepathFromFilenames(originalFullNames)
  76. #It might be faster to load the model and weights separately. need testing.
  77. startModelLoadTime = time.time()
  78. print("Loading model...")
  79. # ~ print("COMENTED OUT FOR TESTING!")
  80. from tensorflow.keras.models import load_model
  81. theModel = load_model(CHECKPOINT_FOLDER)
  82. theModel.summary()
  83. mLoadElapsed = time.time() - startModelLoadTime
  84. print("Loading model complete in: " + str(mLoadElapsed) + " seconds.")
  85. #Get a list of predictions
  86. print("Making predictions...")
  87. startTime = time.time()
  88. predictionsArray = theModel.predict( \
  89. images_ds,
  90. verbose = 1,
  91. # ~ steps = 2, #only predict two batches of 32 pictures to test faster.
  92. )
  93. elapsedTime = time.time() - startTime
  94. print(str(predictionsArray))
  95. print("Prediction took: " + str(elapsedTime) + " seconds.")
  96. print("Copying files...")
  97. copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, CLASS_NAMES_LIST_INT, CLASS_NAMES_LIST_STR)
  98. print("Done!")
  99. def normalizeAllNames(originalFullNames):
  100. outList = []
  101. for name in originalFullNames:
  102. outList.append( os.path.normpath(name) )
  103. return outList
  104. def copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, classNamesListInt, classNamesListStr):
  105. import shutil
  106. #Get predicted labels in integer form.
  107. labelsListInt = getPredictedLabels(predictionsArray)
  108. #get all predicted labels in string form for use as output folders
  109. labelsListStr = getAllOutFoldersStr(classNamesListStr, labelsListInt)
  110. for i in range(len(predictionsArray)):
  111. thisOutClassFolder = labelsListStr[i] #get this out folder
  112. thisOutName = originalNames[i]
  113. #get full path of output name
  114. thisOutputFolder = os.path.join(destStr, thisOutClassFolder)
  115. #copy original to destination
  116. thisFullOriginalName = originalFullNames[i]
  117. try:
  118. shutil.copy2( thisFullOriginalName, thisOutputFolder)
  119. except:
  120. print("copy skipping: " + str(thisName))
  121. #Takes a list of file paths and returns a tensorflow dataset object.
  122. #Resize to the same size as the model was trained on.
  123. #NOTE: If we instead load each image individually with: tf.keras.preprocessing.image.load_img()
  124. #We can save them in a numpy array
  125. #then we can use a for loop to predict on each image individually
  126. #this way we can update a loading bar to show how much time is left.
  127. #OR EVEN BETTER:
  128. #To save ram on their shitty government workstations, we can load each
  129. #image individually, and the predict that one image right away.
  130. #That way there is only ever one image in ram. The downside is that there
  131. #will probably be no batching or multiprocessing.? We'll have to test and see!
  132. #OR we could combine the two methods:
  133. #We could load BATCH_NUM images from files, and combine them into a numpy array,
  134. #then predict on each image in the array and sort, then continue with
  135. #the rest of the dataset in this way.
  136. def createDatasetFromImages(sourceFolderStr):
  137. from tensorflow.keras.preprocessing import image_dataset_from_directory
  138. out_ds = image_dataset_from_directory( \
  139. sourceFolderStr,
  140. labels = None,
  141. label_mode = None,
  142. color_mode = "rgb",
  143. image_size = (IMG_HEIGHT, IMG_WIDTH), #triple check it is (h, w). Pillow files are (w, h) and need conversion to numpy/tensorflow by swapping height and width dimensions. (transpose?)
  144. batch_size = 32, #this might need tweaking depending on how much ram their computers have. 32 is default.
  145. shuffle = False,
  146. interpolation = "bilinear", #default is bilinear
  147. )
  148. # Found undocumented filename return lol
  149. # https://stackoverflow.com/questions/62166588/how-to-obtain-filenames-during-prediction-while-using-tf-keras-preprocessing-ima
  150. # ~ fnames = out_ds.file_paths
  151. # ~ for name in fnames:
  152. # ~ print(name)
  153. outNames = out_ds.file_paths
  154. from tensorflow import data
  155. AUTOTUNE = data.AUTOTUNE
  156. from tensorflow.keras.layers import Rescaling
  157. normalization_layer = Rescaling(1./255) #for newer versions of tensorflow
  158. out_ds = out_ds.map(lambda x: normalization_layer(x), num_parallel_calls=AUTOTUNE)
  159. return out_ds, outNames
  160. def createOutputFoldernames(namesList, destStr):
  161. outFNames = []
  162. destNormalized = os.path.normpath(destStr)
  163. for name in namesList:
  164. outFNames.append( os.path.join(destNormalized, name) )
  165. return outFNames
  166. # Creates the necessary directories.
  167. def makeDirectories(listOfFoldersToCreate):
  168. for folder in listOfFoldersToCreate:
  169. if not os.path.isdir(folder):
  170. os.makedirs(folder)
  171. #deletes each dir within a list
  172. def deleteDirectories(listDirsToDelete):
  173. import shutil
  174. for folder in listDirsToDelete:
  175. if os.path.isdir(folder):
  176. shutil.rmtree(folder, ignore_errors = True)
  177. #Need to preserve filenames with os.walk (thats how tensorflow loads them when not shuffled.)
  178. #Investigate! Make sure that the images have the right names when they come out! A small class like weasel should be good for that.
  179. def sortPredictions(images_ds, predictionsArray, sourceStr, destStr, classNamesListInt, classNamesListStr):
  180. from skimage.io import imsave
  181. from skimage.util import img_as_ubyte
  182. import numpy as np
  183. #Get list of predictions in int form
  184. labelsListInt = getPredictedLabels(predictionsArray)
  185. #get list of output names given the input names using os.walk (without a base path)
  186. outNamesList = getListOfFilenames(sourceStr)
  187. #Put images in the correct places.
  188. i = 0
  189. for batch in images_ds:
  190. batchArr = np.asarray(batch)
  191. while batchArr is not None: #HUHUHUHU
  192. for j in range(len(batchArr)):
  193. thisImg = img_as_ubyte( batchArr[j] )
  194. thisFolderStr = getOutFolderNameStr(classNamesListStr, labelsListInt[i])
  195. thisFileName = outNamesList[i]
  196. fnameStr = os.path.join(destStr, thisFolderStr, thisFileName)
  197. imsave(fnameStr, thisImg)
  198. i += 1
  199. # ~ for i in range(len(images_ds)):
  200. #could we do for i in range len(_ds) * batch_size .. for j in range len (_ds)??
  201. def getAllOutFoldersStr(classNamesListStr, labelsListInt):
  202. outFoldersList = []
  203. for labelInt in labelsListInt:
  204. outFoldersList.append( getOutFolderNameStr(classNamesListStr, labelInt ) )
  205. return outFoldersList
  206. def getOutFolderNameStr(classNamesListStr, classInt):
  207. return classNamesListStr[ classInt ]
  208. #Returns a list of filenames from the input directory
  209. #uses os.walk, so it should be the same order as tensorflow loads them!!!!!!!!!!!!!!!!!!!!!!!!!!!maybe
  210. #Currently not getting the same filenames as tensorflow :(
  211. def getListOfFilenames(baseDirectory, include_base = False):
  212. myNames = []
  213. for (root, dirNames, fileNames) in os.walk(baseDirectory):
  214. for aFile in fileNames:
  215. if include_base:
  216. myNames.append( os.path.join( root, aFile ) )
  217. else:
  218. myNames.append(aFile)
  219. return myNames
  220. def stripBasepathFromFilenames(inList):
  221. outList = []
  222. for name in inList:
  223. outList.append( os.path.basename(name) )
  224. return outList
  225. #Transform scores array into predicted labels.
  226. def getPredictedLabels(predictedScores):
  227. import numpy as np
  228. outList = []
  229. for score in predictedScores:
  230. outList.append(np.argmax(score))
  231. return np.asarray(outList)