upload_to_s3.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #
  2. # Copyright (c) Contributors to the Open 3D Engine Project.
  3. # For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR MIT
  6. #
  7. #
  8. '''
  9. Usage:
  10. Use EC2 role to upload all .zip and .MD5 files in %WORKSPACE% folder to bucket ly-packages-mainline:
  11. python upload_to_s3.py --base_dir %WORKSPACE% --file_regex "(.*zip$|.*MD5$)" --bucket ly-packages-mainline
  12. Use profile to upload all .zip and .MD5 files in %WORKSPACE% folder to bucket ly-packages-mainline:
  13. python upload_to_s3.py --base_dir %WORKSPACE% --profile profile --file_regex "(.*zip$|.*MD5$)" --bucket ly-packages-mainline
  14. Another example usage for uploading all .png and .ppm files inside base_dir and only subdirectories within base_dir:
  15. python upload_to_s3.py --base_dir %WORKSPACE%/path/to/files --file_regex "(.*png$|.*ppm$)" --bucket screenshot-test-bucket --search_subdirectories True --key_prefix Test
  16. '''
  17. import os
  18. import re
  19. import json
  20. import time
  21. import boto3
  22. import pathlib
  23. from optparse import OptionParser
  24. def parse_args():
  25. parser = OptionParser()
  26. parser.add_option("--base_dir", dest="base_dir", default=os.getcwd(), help="Base directory to upload files, If not given, then current directory is used.")
  27. parser.add_option("--file_regex", dest="file_regex", default=None, help="Regular expression that used to match file names to upload.")
  28. parser.add_option("--profile", dest="profile", default=None, help="The name of a profile to use. If not given, then the default profile is used.")
  29. parser.add_option("--bucket", dest="bucket", default=None, help="S3 bucket the files are uploaded to.")
  30. parser.add_option("--key_prefix", dest="key_prefix", default='', help="Object key prefix.")
  31. parser.add_option("--search_subdirectories", dest="search_subdirectories", action='store_true',
  32. help="Toggle for searching for files in subdirectories beneath base_dir, defaults to False")
  33. '''
  34. ExtraArgs used to call s3.upload_file(), should be in json format. extra_args key must be one of: ACL, CacheControl, ContentDisposition, ContentEncoding, ContentLanguage, ContentType, Expires,
  35. GrantFullControl, GrantRead, GrantReadACP, GrantWriteACP, Metadata, RequestPayer, ServerSideEncryption, StorageClass,
  36. SSECustomerAlgorithm, SSECustomerKey, SSECustomerKeyMD5, SSEKMSKeyId, WebsiteRedirectLocation
  37. '''
  38. parser.add_option("--extra_args", dest="extra_args", default=None, help="Additional parameters used to upload file.")
  39. parser.add_option("--max_retry", dest="max_retry", default=1, help="Maximum retry times to upload file.")
  40. (options, args) = parser.parse_args()
  41. if not os.path.isdir(options.base_dir):
  42. error('{} is not a valid directory'.format(options.base_dir))
  43. if not options.file_regex:
  44. error('Use --file_regex to specify regular expression that used to match file names to upload.')
  45. if not options.bucket:
  46. error('Use --bucket to specify bucket that the files are uploaded to.')
  47. return options
  48. def error(message):
  49. print(f'Error: {message}')
  50. exit(1)
  51. def get_client(service_name, profile_name):
  52. session = boto3.session.Session(profile_name=profile_name)
  53. client = session.client(service_name)
  54. return client
  55. def get_files_to_upload(base_dir, regex, search_subdirectories):
  56. """
  57. Uses a regex expression pattern to return a list of file paths for files to upload to the s3 bucket.
  58. :param base_dir: path for the base directory, if using search_subdirectories=True ensure this is the parent.
  59. :param regex: pattern to use for regex searching, ex. "(.*zip$|.*MD5$)"
  60. :param search_subdirectories: boolean False for only getting files in base_dir, True to get all files in base_dir
  61. and any subdirectory inside base_dir, defaults to False from the parse_args() function.
  62. :return: a list of string file paths for files to upload to the s3 bucket matching the regex expression.
  63. """
  64. # Get all file names in base directory
  65. files = [os.path.join(base_dir, x) for x in os.listdir(base_dir) if os.path.isfile(os.path.join(base_dir, x))]
  66. if search_subdirectories: # Get all file names in base directory and any subdirectories.
  67. for subdirectory in os.walk(base_dir):
  68. # Example output for subdirectory:
  69. # ('C:\path\to\base_dir\', ['Subfolder1', 'Subfolder2'], ['file1', 'file2'])
  70. subdirectory_file_path = subdirectory[0]
  71. subdirectory_files = subdirectory[2]
  72. if subdirectory_files:
  73. subdirectory_file_paths = _build_file_paths(subdirectory_file_path, subdirectory_files)
  74. files.extend(subdirectory_file_paths)
  75. try:
  76. regex = json.loads(regex) # strip the surround quotes, if they exist
  77. except:
  78. print(f'WARNING: failed to call json.loads() for regex: "{regex}"')
  79. pass
  80. # Get all file names matching the regular expression, those file will be uploaded to S3
  81. regex_files_to_upload = [x for x in files if re.match(regex, x)]
  82. return regex_files_to_upload
  83. def s3_upload_file(client, base_dir, file, bucket, key_prefix=None, extra_args=None, max_retry=1):
  84. try:
  85. # replicate the local folder structure relative to search root in the bucket path
  86. s3_file_path = pathlib.Path(file).relative_to(base_dir).as_posix()
  87. except ValueError as err:
  88. print(f'Unexpected file error: {err}')
  89. return False
  90. key = s3_file_path if key_prefix is None else f'{key_prefix}/{s3_file_path}'
  91. error_message = None
  92. for x in range(max_retry):
  93. try:
  94. client.upload_file(file, bucket, key, ExtraArgs=extra_args)
  95. return True
  96. except Exception as err:
  97. time.sleep(0.1) # Sleep for 100 milliseconds between retries.
  98. error_message = err
  99. print(f'Upload failed - Exception while uploading: {error_message}')
  100. return False
  101. def _build_file_paths(path_to_files, files_in_path):
  102. """
  103. Given a path containing files, returns a list of strings representing complete paths to each file.
  104. :param path_to_files: path to the location storing the files to create string paths for
  105. :param files_in_path: list of files that are inside the path_to_files path string
  106. :return: list of fully parsed file path strings from path_to_files path.
  107. """
  108. parsed_file_paths = []
  109. for file_in_path in files_in_path:
  110. complete_file_path = os.path.join(path_to_files, file_in_path)
  111. if os.path.isfile(complete_file_path):
  112. parsed_file_paths.append(complete_file_path)
  113. return parsed_file_paths
  114. if __name__ == "__main__":
  115. options = parse_args()
  116. client = get_client('s3', options.profile)
  117. files_to_upload = get_files_to_upload(options.base_dir, options.file_regex, options.search_subdirectories)
  118. extra_args = json.loads(options.extra_args) if options.extra_args else None
  119. print(('Uploading {} files to bucket {}.'.format(len(files_to_upload), options.bucket)))
  120. failure = []
  121. success = []
  122. for file in files_to_upload:
  123. if not s3_upload_file(client, options.base_dir, file, options.bucket, options.key_prefix, extra_args, 2):
  124. failure.append(file)
  125. else:
  126. success.append(file)
  127. print('Upload finished.')
  128. print(('{} files are uploaded successfully:'.format(len(success))))
  129. print(('\n'.join(success)))
  130. if len(failure) > 0:
  131. print(('{} files failed to upload:'.format(len(failure))))
  132. print(('\n'.join(failure)))
  133. # Exit with error code 1 if any file is failed to upload
  134. exit(1)