AnimalSorter.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # animal-sorter.py
  5. #
  6. # Sorts the animal pictures into folders based on a trained tensorflow model.
  7. #
  8. # Copyright 2022 Stephen Stengel <stephen.stengel@cwu.edu> and friends
  9. #
  10. import os
  11. #Not currently needed. We load a saved version of the whole model.
  12. #We would need this if we switched to loading weights
  13. # ~ from models import currentBestModel
  14. CLASS_NAMES_LIST_INT = []
  15. CLASS_NAMES_LIST_STR = []
  16. IMG_WIDTH = None
  17. IMG_HEIGHT = None
  18. IMG_CHANNELS = None
  19. IMG_SHAPE_TUPPLE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
  20. BATCH_SIZE = None
  21. CHECKPOINT_FOLDER = None
  22. DEBUG_MODE = False
  23. def sortAnimalsIntoFolders(sourceStr, destStr):
  24. import time
  25. settingsFileName = os.path.normpath("settings.ini")
  26. settingsDict = getSettingsFromFile(settingsFileName)
  27. updateGlobalsFromSettings(settingsDict)
  28. if DEBUG_MODE:
  29. print("Settings retrieved from " + settingsFileName)
  30. print("IMG_WIDTH: " + str(IMG_WIDTH))
  31. print("IMG_HEIGHT: " + str(IMG_HEIGHT))
  32. print("IMG_CHANNELS: " + str(IMG_CHANNELS))
  33. print("BATCH_SIZE: " + str(BATCH_SIZE))
  34. print("CHECKPOINT_FOLDER: " + str(CHECKPOINT_FOLDER))
  35. print("Source dir: " + str(sourceStr))
  36. print("Destenation dir: " + str(destStr))
  37. #create the folder structure within the destination directory.
  38. print("Setting up output directories...")
  39. foldersToCreate = createOutputFoldernames(CLASS_NAMES_LIST_STR, destStr)
  40. makeDirectories(foldersToCreate)
  41. #Turn the input images into a dataset?
  42. print("Loading the dataset...")
  43. startDataLoad = time.time()
  44. images_ds, originalFullNames = createDatasetFromImages(sourceStr)
  45. endDataLoad = time.time()
  46. print("Dataset loaded in " + str(round(endDataLoad - startDataLoad, 4)) + " seconds.")
  47. #normalize file paths for all operating systems
  48. originalFullNames = normalizeAllNames(originalFullNames)
  49. #strip base path from original names
  50. originalNames = stripBasepathFromFilenames(originalFullNames)
  51. #It might be faster to load the model and weights separately. need testing.
  52. #how to load the checkpoint weights separate from the model
  53. # ~ theModel.load_weights(os.path.abspath(CHECKPOINT_FOLDER))
  54. startModelLoadTime = time.time()
  55. print("Loading model...")
  56. from tensorflow.keras.models import load_model
  57. theModel = load_model(CHECKPOINT_FOLDER)
  58. theModel.summary()
  59. mLoadElapsed = time.time() - startModelLoadTime
  60. print("Loading model complete in: " + str(round(mLoadElapsed, 4)) + " seconds.")
  61. #Get a list of predictions
  62. print("Making predictions...")
  63. startTime = time.time()
  64. predictionsArray = theModel.predict( \
  65. images_ds,
  66. verbose = 1,
  67. )
  68. elapsedTime = time.time() - startTime
  69. print(str(predictionsArray))
  70. print("Prediction took: " + str(round(elapsedTime, 4)) + " seconds.")
  71. print("Copying files...")
  72. copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, CLASS_NAMES_LIST_INT, CLASS_NAMES_LIST_STR)
  73. #This function sets a bunch of settings from a file.
  74. #Maybe the threading return value code could make a popup on error.
  75. def getSettingsFromFile(settingsFileName):
  76. fileContents = []
  77. with open(settingsFileName, "r") as settingsFile:
  78. fileContents = settingsFile.readlines()
  79. #get only non comment lines
  80. goodContents = []
  81. for line in fileContents:
  82. if not line.startswith("#") and line != "\n":
  83. goodContents.append(line.rstrip())
  84. #convert to a dictionary
  85. settingsDict = {}
  86. for thing in goodContents:
  87. name, value = thing.split("=")
  88. settingsDict.update({name : value})
  89. if DEBUG_MODE:
  90. for thing in settingsDict:
  91. print(str(thing) + ": ", end="")
  92. print(settingsDict[thing])
  93. return settingsDict
  94. def updateGlobalsFromSettings(settingsDict):
  95. ##! BIG NOTE !##
  96. # The names of the items in this settingsDict dictionary are not
  97. # automatically the same as in the settings file. Take care to copy
  98. # paste them if you update this. There might be a clever way to
  99. # get the same name automatically.
  100. global IMG_WIDTH
  101. IMG_WIDTH = int(settingsDict["IMG_WIDTH"])
  102. global IMG_HEIGHT
  103. IMG_HEIGHT = int(settingsDict["IMG_HEIGHT"])
  104. global IMG_CHANNELS
  105. IMG_CHANNELS = int(settingsDict["IMG_CHANNELS"])
  106. global CHECKPOINT_FOLDER
  107. CHECKPOINT_FOLDER = settingsDict["CHECKPOINT_FOLDER"] #This one remains a string
  108. global BATCH_SIZE
  109. BATCH_SIZE = int(settingsDict["BATCH_SIZE"])
  110. #parse the class list
  111. rawClassNamesStr = settingsDict["classNames"]
  112. splitClassNamesList = rawClassNamesStr.split(",")
  113. global CLASS_NAMES_LIST_STR
  114. CLASS_NAMES_LIST_STR = splitClassNamesList
  115. #Not sure what I was thinking way back when I made CLASS_NAMES_LIST_INT in the loader haha.
  116. # ~ intsListLol = [x for x in range(len(splitClassNamesList))] #A little roundabout
  117. intsListLol = range(len(splitClassNamesList)) #Apparently equivalent.
  118. global CLASS_NAMES_LIST_INT
  119. CLASS_NAMES_LIST_INT = intsListLol
  120. if DEBUG_MODE:
  121. print("rawclassnamesstr: " + str(rawClassNamesStr))
  122. print("splitClassNamesList: " + str(splitClassNamesList))
  123. print("CLASS_NAMES_LIST_INT: " + str(CLASS_NAMES_LIST_INT))
  124. for thing in CLASS_NAMES_LIST_INT:
  125. print(thing)
  126. print("CLASS_NAMES_LIST_STR: " + str(CLASS_NAMES_LIST_STR))
  127. for thing in CLASS_NAMES_LIST_STR:
  128. print(thing)
  129. testBoi = range(100, 0, -1)
  130. print("testboi: " + str(testBoi))
  131. print("now tryina get just 5...")
  132. print("testBoi[5]: " + str(testBoi[5]))
  133. def normalizeAllNames(originalFullNames):
  134. outList = []
  135. for name in originalFullNames:
  136. outList.append( os.path.normpath(name) )
  137. return outList
  138. #This function copys the original full size image into the correct
  139. #destination folder based on the result of the search.
  140. def copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, classNamesListInt, classNamesListStr):
  141. import shutil
  142. #Get predicted labels in integer form.
  143. labelsListInt = getPredictedLabels(predictionsArray)
  144. #get all predicted labels in string form for use as output folders
  145. labelsListStr = getAllOutFoldersStr(classNamesListStr, labelsListInt)
  146. for i in range(len(predictionsArray)):
  147. thisOutClassFolder = labelsListStr[i] #get this out folder
  148. thisOutName = originalNames[i]
  149. #get full path of output name
  150. thisOutputFolder = os.path.join(destStr, thisOutClassFolder)
  151. #copy original to destination
  152. thisFullOriginalName = originalFullNames[i]
  153. try:
  154. shutil.copy2( thisFullOriginalName, thisOutputFolder)
  155. except:
  156. print("copy skipping: " + str(thisName))
  157. #Takes a list of file paths and returns a tensorflow dataset object.
  158. #Resize to the same size as the model was trained on.
  159. #NOTE: If we instead load each image individually with: tf.keras.preprocessing.image.load_img()
  160. #We can save them in a numpy array
  161. #then we can use a for loop to predict on each image individually
  162. #this way we can update a loading bar to show how much time is left.
  163. #OR EVEN BETTER:
  164. #To save ram on their shitty government workstations, we can load each
  165. #image individually, and the predict that one image right away.
  166. #That way there is only ever one image in ram. The downside is that there
  167. #will probably be no batching or multiprocessing.? We'll have to test and see!
  168. #OR we could combine the two methods:
  169. #We could load BATCH_NUM images from files, and combine them into a numpy array,
  170. #then predict on each image in the array and sort, then continue with
  171. #the rest of the dataset in this way.
  172. def createDatasetFromImages(sourceFolderStr):
  173. from tensorflow.keras.preprocessing import image_dataset_from_directory
  174. out_ds = image_dataset_from_directory( \
  175. sourceFolderStr,
  176. labels = None,
  177. label_mode = None,
  178. color_mode = "rgb",
  179. image_size = (IMG_HEIGHT, IMG_WIDTH), #triple check it is (h, w). Pillow files are (w, h) and need conversion to numpy/tensorflow by swapping height and width dimensions. (transpose?)
  180. batch_size = BATCH_SIZE, #this might need tweaking depending on how much ram their computers have. 32 is default.
  181. shuffle = False,
  182. interpolation = "bilinear", #default is bilinear
  183. )
  184. # Found undocumented filename return lol
  185. # https://stackoverflow.com/questions/62166588/how-to-obtain-filenames-during-prediction-while-using-tf-keras-preprocessing-ima
  186. # ~ fnames = out_ds.file_paths
  187. # ~ for name in fnames:
  188. # ~ print(name)
  189. outNames = out_ds.file_paths
  190. from tensorflow import data
  191. AUTOTUNE = data.AUTOTUNE
  192. try:
  193. from tensorflow.keras.layers import Rescaling
  194. except:
  195. from tensorflow.keras.layers.experimental.preprocessing import Rescaling
  196. normalization_layer = Rescaling(1./255) #for newer versions of tensorflow
  197. out_ds = out_ds.map(lambda x: normalization_layer(x), num_parallel_calls=AUTOTUNE)
  198. return out_ds, outNames
  199. def createOutputFoldernames(namesList, destStr):
  200. outFNames = []
  201. destNormalized = os.path.normpath(destStr)
  202. for name in namesList:
  203. outFNames.append( os.path.join(destNormalized, name) )
  204. return outFNames
  205. # Creates the necessary directories.
  206. def makeDirectories(listOfFoldersToCreate):
  207. for folder in listOfFoldersToCreate:
  208. if not os.path.isdir(folder):
  209. os.makedirs(folder)
  210. #deletes each dir within a list
  211. def deleteDirectories(listDirsToDelete):
  212. import shutil
  213. for folder in listDirsToDelete:
  214. if os.path.isdir(folder):
  215. shutil.rmtree(folder, ignore_errors = True)
  216. def sortPredictions(images_ds, predictionsArray, sourceStr, destStr, classNamesListInt, classNamesListStr):
  217. from skimage.io import imsave
  218. from skimage.util import img_as_ubyte
  219. import numpy as np
  220. #Get list of predictions in int form
  221. labelsListInt = getPredictedLabels(predictionsArray)
  222. #get list of output names given the input names using os.walk (without a base path)
  223. outNamesList = getListOfFilenames(sourceStr)
  224. #Put images in the correct places.
  225. i = 0
  226. for batch in images_ds:
  227. batchArr = np.asarray(batch)
  228. while batchArr is not None: #HUHUHUHU
  229. for j in range(len(batchArr)):
  230. thisImg = img_as_ubyte( batchArr[j] )
  231. thisFolderStr = getOutFolderNameStr(classNamesListStr, labelsListInt[i])
  232. thisFileName = outNamesList[i]
  233. fnameStr = os.path.join(destStr, thisFolderStr, thisFileName)
  234. imsave(fnameStr, thisImg)
  235. i += 1
  236. # ~ for i in range(len(images_ds)):
  237. #could we do for i in range len(_ds) * batch_size .. for j in range len (_ds)??
  238. def getAllOutFoldersStr(classNamesListStr, labelsListInt):
  239. outFoldersList = []
  240. for labelInt in labelsListInt:
  241. outFoldersList.append( getOutFolderNameStr(classNamesListStr, labelInt ) )
  242. return outFoldersList
  243. def getOutFolderNameStr(classNamesListStr, classInt):
  244. return classNamesListStr[ classInt ]
  245. #Returns a list of filenames from the input directory
  246. def getListOfFilenames(baseDirectory, include_base = False):
  247. myNames = []
  248. for (root, dirNames, fileNames) in os.walk(baseDirectory):
  249. for aFile in fileNames:
  250. if include_base:
  251. myNames.append( os.path.join( root, aFile ) )
  252. else:
  253. myNames.append(aFile)
  254. return myNames
  255. def stripBasepathFromFilenames(inList):
  256. outList = []
  257. for name in inList:
  258. outList.append( os.path.basename(name) )
  259. return outList
  260. #Transform scores array into predicted labels.
  261. def getPredictedLabels(predictedScores):
  262. import numpy as np
  263. outList = []
  264. for score in predictedScores:
  265. outList.append(np.argmax(score))
  266. return np.asarray(outList)