123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- #
- # animal-sorter.py
- #
- # Sorts the animal pictures into folders based on a trained tensorflow model.
- #
- # Copyright 2022 Stephen Stengel <stephen.stengel@cwu.edu> and friends
- #
- import os
- # ~ from models import currentBestModel
- #Current plan:
- #start with making the sorting function work.
- #Add the funtion to the run button with default folder values for testing.
- #Then:
- #Open up a splash screen when user clicks on the exe
- #Load up the imports required
- #delete the splash screen with gtk.clear or something.
- #load up the actual program window
- #In the main window, use the file paths as the inputs to the sorting function.
- CLASS_BOBCAT = 0
- CLASS_COYOTE = 1
- CLASS_DEER = 2
- CLASS_ELK = 3
- CLASS_HUMAN = 4
- CLASS_NOT_INTERESTING = 5
- CLASS_RACCOON = 6
- CLASS_WEASEL = 7
- CLASS_BOBCAT_STRING = "bobcat"
- CLASS_COYOTE_STRING = "coyote"
- CLASS_DEER_STRING = "deer"
- CLASS_ELK_STRING = "elk"
- CLASS_HUMAN_STRING = "human"
- CLASS_RACCOON_STRING = "raccoon"
- CLASS_WEASEL_STRING = "weasel"
- CLASS_NOT_INTERESTING_STRING = "not"
- CLASS_NAMES_LIST_INT = [CLASS_BOBCAT, CLASS_COYOTE, CLASS_DEER, CLASS_ELK, CLASS_HUMAN, CLASS_NOT_INTERESTING, CLASS_RACCOON, CLASS_WEASEL]
- CLASS_NAMES_LIST_STR = [CLASS_BOBCAT_STRING, CLASS_COYOTE_STRING, CLASS_DEER_STRING, CLASS_ELK_STRING, CLASS_HUMAN_STRING, CLASS_NOT_INTERESTING_STRING, CLASS_RACCOON_STRING, CLASS_WEASEL_STRING]
- ## We really should make a config file so that we don't need all these globals. ##
- IMG_WIDTH = 100
- IMG_HEIGHT = 100
- IMG_CHANNELS = 3
- IMG_SHAPE_TUPPLE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
- CHECKPOINT_FOLDER = os.path.normpath("./checkpoint/")
- print("Checkpoint folder: " + CHECKPOINT_FOLDER)
- #Testing print
- PRESENT_DIRECTORY = os.path.normpath(os.path.dirname(__file__) )
- print("Present directory: " + PRESENT_DIRECTORY)
- def sortAnimalsIntoFolders(sourceStr, destStr):
- import time
- print("Source dir: " + str(sourceStr))
- print("Destenation dir: " + str(destStr))
-
- #create the folder structure within the destination directory.
- print("Setting up output directories...")
- foldersToCreate = createOutputFoldernames(CLASS_NAMES_LIST_STR, destStr)
- makeDirectories(foldersToCreate)
-
- #Load the model from models.py. This is currently blocking the gui. (can't update window till done)
- #maybe this should be a part of the splash screen step?
- #just pass the model in as a parameter.
- #maybe change this to a class object and save as a field.? more work tho.
- #another solution: a separate button to load the model, keep the run button disabled until it is readied.
- # ~ print("Loading model...")
- # ~ theModel = currentBestModel(IMG_SHAPE_TUPPLE)
- # ~ print("printing summary...")
- # ~ theModel.summary()
-
- #Load the checkpoint weights.
- # ~ print("Loading weights...")
- # ~ theModel.load_weights(os.path.abspath(CHECKPOINT_FOLDER))
-
-
- #Turn the input images into a dataset?
- print("Loading the dataset...")
- images_ds, originalFullNames = createDatasetFromImages(sourceStr)
-
- #normalize file paths for all operating systems
- originalFullNames = normalizeAllNames(originalFullNames)
-
- #strip base path from original names
- originalNames = stripBasepathFromFilenames(originalFullNames)
-
-
- #It might be faster to load the model and weights separately. need testing.
- startModelLoadTime = time.time()
- print("Loading model...")
- # ~ print("COMENTED OUT FOR TESTING!")
- from tensorflow.keras.models import load_model
- theModel = load_model(CHECKPOINT_FOLDER)
- theModel.summary()
- mLoadElapsed = time.time() - startModelLoadTime
- print("Loading model complete in: " + str(mLoadElapsed) + " seconds.")
-
-
- #Get a list of predictions
- print("Making predictions...")
- startTime = time.time()
- predictionsArray = theModel.predict( \
- images_ds,
- verbose = 1,
- # ~ steps = 2, #only predict two batches of 32 pictures to test faster.
- )
- elapsedTime = time.time() - startTime
- print(str(predictionsArray))
- print("Prediction took: " + str(elapsedTime) + " seconds.")
-
-
- print("Copying files...")
- copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, CLASS_NAMES_LIST_INT, CLASS_NAMES_LIST_STR)
- print("Done!")
- def normalizeAllNames(originalFullNames):
- outList = []
- for name in originalFullNames:
- outList.append( os.path.normpath(name) )
-
- return outList
- def copyPredictions(originalFullNames, originalNames, predictionsArray, destStr, classNamesListInt, classNamesListStr):
- import shutil
- #Get predicted labels in integer form.
- labelsListInt = getPredictedLabels(predictionsArray)
-
- #get all predicted labels in string form for use as output folders
- labelsListStr = getAllOutFoldersStr(classNamesListStr, labelsListInt)
-
- for i in range(len(predictionsArray)):
- thisOutClassFolder = labelsListStr[i] #get this out folder
- thisOutName = originalNames[i]
-
- #get full path of output name
- thisOutputFolder = os.path.join(destStr, thisOutClassFolder)
-
- #copy original to destination
- thisFullOriginalName = originalFullNames[i]
-
- try:
- shutil.copy2( thisFullOriginalName, thisOutputFolder)
- except:
- print("copy skipping: " + str(thisName))
- #Takes a list of file paths and returns a tensorflow dataset object.
- #Resize to the same size as the model was trained on.
- #NOTE: If we instead load each image individually with: tf.keras.preprocessing.image.load_img()
- #We can save them in a numpy array
- #then we can use a for loop to predict on each image individually
- #this way we can update a loading bar to show how much time is left.
- #OR EVEN BETTER:
- #To save ram on their shitty government workstations, we can load each
- #image individually, and the predict that one image right away.
- #That way there is only ever one image in ram. The downside is that there
- #will probably be no batching or multiprocessing.? We'll have to test and see!
- #OR we could combine the two methods:
- #We could load BATCH_NUM images from files, and combine them into a numpy array,
- #then predict on each image in the array and sort, then continue with
- #the rest of the dataset in this way.
- def createDatasetFromImages(sourceFolderStr):
- from tensorflow.keras.preprocessing import image_dataset_from_directory
- out_ds = image_dataset_from_directory( \
- sourceFolderStr,
- labels = None,
- label_mode = None,
- color_mode = "rgb",
- image_size = (IMG_HEIGHT, IMG_WIDTH), #triple check it is (h, w). Pillow files are (w, h) and need conversion to numpy/tensorflow by swapping height and width dimensions. (transpose?)
- batch_size = 32, #this might need tweaking depending on how much ram their computers have. 32 is default.
- shuffle = False,
- interpolation = "bilinear", #default is bilinear
- )
-
-
- # Found undocumented filename return lol
- # https://stackoverflow.com/questions/62166588/how-to-obtain-filenames-during-prediction-while-using-tf-keras-preprocessing-ima
- # ~ fnames = out_ds.file_paths
- # ~ for name in fnames:
- # ~ print(name)
-
-
- outNames = out_ds.file_paths
-
- from tensorflow import data
- AUTOTUNE = data.AUTOTUNE
- from tensorflow.keras.layers import Rescaling
- normalization_layer = Rescaling(1./255) #for newer versions of tensorflow
- out_ds = out_ds.map(lambda x: normalization_layer(x), num_parallel_calls=AUTOTUNE)
-
-
- return out_ds, outNames
- def createOutputFoldernames(namesList, destStr):
- outFNames = []
- destNormalized = os.path.normpath(destStr)
- for name in namesList:
- outFNames.append( os.path.join(destNormalized, name) )
-
- return outFNames
-
- # Creates the necessary directories.
- def makeDirectories(listOfFoldersToCreate):
- for folder in listOfFoldersToCreate:
- if not os.path.isdir(folder):
- os.makedirs(folder)
- #deletes each dir within a list
- def deleteDirectories(listDirsToDelete):
- import shutil
- for folder in listDirsToDelete:
- if os.path.isdir(folder):
- shutil.rmtree(folder, ignore_errors = True)
- #Need to preserve filenames with os.walk (thats how tensorflow loads them when not shuffled.)
- #Investigate! Make sure that the images have the right names when they come out! A small class like weasel should be good for that.
- def sortPredictions(images_ds, predictionsArray, sourceStr, destStr, classNamesListInt, classNamesListStr):
-
- from skimage.io import imsave
- from skimage.util import img_as_ubyte
- import numpy as np
-
- #Get list of predictions in int form
- labelsListInt = getPredictedLabels(predictionsArray)
- #get list of output names given the input names using os.walk (without a base path)
- outNamesList = getListOfFilenames(sourceStr)
-
- #Put images in the correct places.
- i = 0
- for batch in images_ds:
- batchArr = np.asarray(batch)
- while batchArr is not None: #HUHUHUHU
- for j in range(len(batchArr)):
- thisImg = img_as_ubyte( batchArr[j] )
- thisFolderStr = getOutFolderNameStr(classNamesListStr, labelsListInt[i])
- thisFileName = outNamesList[i]
- fnameStr = os.path.join(destStr, thisFolderStr, thisFileName)
- imsave(fnameStr, thisImg)
- i += 1
-
- # ~ for i in range(len(images_ds)):
-
-
- #could we do for i in range len(_ds) * batch_size .. for j in range len (_ds)??
- def getAllOutFoldersStr(classNamesListStr, labelsListInt):
- outFoldersList = []
- for labelInt in labelsListInt:
- outFoldersList.append( getOutFolderNameStr(classNamesListStr, labelInt ) )
-
- return outFoldersList
- def getOutFolderNameStr(classNamesListStr, classInt):
- return classNamesListStr[ classInt ]
- #Returns a list of filenames from the input directory
- #uses os.walk, so it should be the same order as tensorflow loads them!!!!!!!!!!!!!!!!!!!!!!!!!!!maybe
- #Currently not getting the same filenames as tensorflow :(
- def getListOfFilenames(baseDirectory, include_base = False):
- myNames = []
- for (root, dirNames, fileNames) in os.walk(baseDirectory):
- for aFile in fileNames:
- if include_base:
- myNames.append( os.path.join( root, aFile ) )
- else:
- myNames.append(aFile)
-
- return myNames
- def stripBasepathFromFilenames(inList):
- outList = []
- for name in inList:
- outList.append( os.path.basename(name) )
- return outList
- #Transform scores array into predicted labels.
- def getPredictedLabels(predictedScores):
- import numpy as np
- outList = []
- for score in predictedScores:
- outList.append(np.argmax(score))
-
- return np.asarray(outList)
|