ArchiveWriter.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #pragma once
  9. #include <Archive/Clients/ArchiveBaseAPI.h>
  10. #include <Archive/Tools/ArchiveWriterAPI.h>
  11. #include <Clients/ArchiveTOC.h>
  12. #include <AzCore/Memory/Memory_fwd.h>
  13. #include <AzCore/RTTI/RTTIMacros.h>
  14. #include <AzCore/std/containers/unordered_map.h>
  15. #include <AzCore/std/parallel/mutex.h>
  16. #include <AzCore/std/smart_ptr/unique_ptr.h>
  17. #include <AzCore/std/utility/to_underlying.h>
  18. #include <AzCore/Task/TaskExecutor.h>
  19. namespace AZ
  20. {
  21. class TaskGraphEvent;
  22. }
  23. namespace AZ::IO
  24. {
  25. class GenericStream;
  26. }
  27. namespace Archive
  28. {
  29. //! class which is used to write into a stream the O3DE Archive format
  30. //! The caller is required to supply a ArchiveWriterSettings structure
  31. //! which contains the ArchiveHeader and ArchiveTableOfContents data
  32. //! to use when writing to the Archive file
  33. //! The class can be initialized with a user supplied AZ::IO::GenericStream class
  34. //! in which case it should be open in stream should needs to be open with OpenMode::ModeUpdate
  35. //! The reason why is that to locate information about any content files in order to update an existing archive
  36. //! it read access is needed
  37. //! The recommend OpenMode value for opening a new archive or updating an existing archive
  38. //! are as follows
  39. //! constexpr OpenMode mode = OpenMode::Update | OpenMode::Append | OpenMode::Binary
  40. //! The Append option makes sure that the Archive is not truncated on open
  41. class ArchiveWriter
  42. : public IArchiveWriter
  43. {
  44. public:
  45. AZ_TYPE_INFO_WITH_NAME_DECL(ArchiveWriter);
  46. AZ_RTTI_NO_TYPE_INFO_DECL();
  47. AZ_CLASS_ALLOCATOR_DECL;
  48. ArchiveWriter();
  49. //! Create an archive writer using the specified writer settings
  50. explicit ArchiveWriter(const ArchiveWriterSettings& writerSettings);
  51. //! Open an file at the specified file path and takes sole ownership of it
  52. //! The ArchiveWriter will close the file on Unmount
  53. explicit ArchiveWriter(AZ::IO::PathView archivePath, const ArchiveWriterSettings& writerSettings = {});
  54. //! Takes ownership of the open stream and will optionally delete it based on the ArchiveFileDeleter
  55. explicit ArchiveWriter(ArchiveStreamPtr archiveStream, const ArchiveWriterSettings& writerSettings = {});
  56. ~ArchiveWriter();
  57. //! Opens the archive path and returns true if successful
  58. //! Will unmount any previously mounted archive
  59. bool MountArchive(AZ::IO::PathView archivePath) override;
  60. bool MountArchive(ArchiveStreamPtr archiveStream) override;
  61. //! Closes the handle to the mounted archive stream
  62. //! This will invoke the Commit() function to write the archive TOC
  63. //! to the stream before closing the stream
  64. void UnmountArchive() override;
  65. //! Returns if an open archive that is mounted
  66. bool IsMounted() const override;
  67. //! Write the updated ArchiveHeader to the beginning of the stream and
  68. //! Table of Contents to end of the stream
  69. //!
  70. //! If this call is successful, the archive TOC has been successfully written
  71. //! This function has been marked [[nodiscard]], to ensure the caller
  72. //! checks the return value
  73. //! @return A successful expectation if the TOC has been written
  74. using CommitResult = AZStd::expected<void, ResultString>;
  75. [[nodiscard]] CommitResult Commit() override;
  76. //! Adds the content from the stream to the relative path
  77. //! @param inputStream stream class where data for the file is source from
  78. //! The entire stream is read into the memory and written into archive
  79. //! @param fileSettings settings used to configure the relative path to
  80. //! write to the archive for the given file data.
  81. //! It also allows users to configure the compression algorithm to use,
  82. //! and whether the AddFileToArchive logic fails if an existing file is being added
  83. //! @return ArchiveAddFileResult containing the actual compression file path
  84. //! as saved to the Archive TOC, the compression algorithm used
  85. //! and an Archive File Token which can be used to remove the file if need be
  86. //! On failure, the result outcome contains any errors that have occurred
  87. ArchiveAddFileResult AddFileToArchive(AZ::IO::GenericStream& inputStream,
  88. const ArchiveWriterFileSettings& fileSettings) override;
  89. //! Use the span contents to add the file to the archive
  90. //! @param inputSpan view of data which will be written to the archive
  91. //! at the relative path supplied in the @fileSettings parameter
  92. //! @param fileSettings settings used to configure the relative path to
  93. //! write to the archive for the given file data.
  94. //! It also allows users to configure the compression algorithm to use,
  95. //! and whether the AddFileToArchive logic fails if an existing file is being added
  96. //! @return ArchiveAddFileResult containing the actual compression file path
  97. //! as saved to the Archive TOC, the compression algorithm used
  98. //! and an Archive File Token which can be used to remove the file if need be
  99. //! On failure, the result outcome contains any errors that have occurred
  100. ArchiveAddFileResult AddFileToArchive(AZStd::span<const AZStd::byte> inputSpan,
  101. const ArchiveWriterFileSettings& fileSettings) override;
  102. //! Searches for a relative path within the archive
  103. //! @param relativePath Relative path within archive to search for
  104. //! @return A token that identifies the Archive file if it exist
  105. //! if the with the specified path doesn't exist InvalidArchiveFileToken is returned
  106. ArchiveFileToken FindFile(AZ::IO::PathView relativePath) const override;
  107. //! Returns if the archive contains a relative path
  108. //! @param relativePath Relative path within archive to search for
  109. //! @returns true if the relative path is contained with the Archive
  110. //! equivalent to `return FindFile(relativePath) != InvalidArchiveFileToken;`
  111. bool ContainsFile(AZ::IO::PathView relativePath) const override;
  112. //! Removes the file from the archive using the ArchiveFileToken
  113. //! @param filePathToken Identifier queried using FindFile or AddFileToArchive
  114. //! NOTE: The entry in the table of contents is not actually removed
  115. //! The index where the file is located using the filePathToken
  116. //! is just added to the removed file indices set
  117. //! @return ArchiveRemoveResult with metadata about how the deleted file was
  118. //! stored in the Archive
  119. ArchiveRemoveFileResult RemoveFileFromArchive(ArchiveFileToken filePathToken) override;
  120. //! Removes the file from the archive using a relative path name
  121. //! @param relativePath relative path within archive to search for
  122. //! @return ArchiveRemoveResult with metadata about how the deleted file was
  123. //! stored in the Archive
  124. ArchiveRemoveFileResult RemoveFileFromArchive(AZ::IO::PathView relativePath) override;
  125. //! Dump metadata for the archive to the supplied generic stream
  126. //! @param metadataStream archive file metadata will be written to the stream
  127. //! @param metadataSettings settings using which control the file metadata to write to the stream
  128. //! @return true if metadata was successfully written
  129. bool DumpArchiveMetadata(AZ::IO::GenericStream& metadataStream,
  130. const ArchiveMetadataSettings& metadataSettings = {}) const override;
  131. private:
  132. bool ReadArchiveHeaderAndToc();
  133. //! Wraps an offset of the block to write plus the block size within the final buffer
  134. //! that will be written to the archive block section
  135. //! When the file is stored uncompressed, the offset is 0 and the size is the entire
  136. //! input span supplied to @AddFileToArchive
  137. struct BlockOffsetSizePair
  138. {
  139. size_t m_offset{ AZStd::numeric_limits<size_t>::max() };
  140. size_t m_size{};
  141. };
  142. //! Encapsulates the compression algorithm plus an output span from compressing the data
  143. struct ContentFileBlocks
  144. {
  145. //! Stores the index into the TOC of compression algorithm to use
  146. AZ::u8 m_compressionAlgorithmIndex{ UncompressedAlgorithmIndex };
  147. //! Stores a vector of offset, size pairs containing each block of the file to store in the
  148. //! archive raw block section
  149. AZStd::vector<BlockOffsetSizePair> m_blockOffsetSizePairs;
  150. //! Span which references the data to write
  151. //! The block offset size pairs are offsets into this span
  152. //! Each block is padded to be aligned to 512 byte boundaries
  153. //! Therefore this span will generally have a larger than size()
  154. //! than the m_totalUnalignedSize member
  155. AZStd::span<const AZStd::byte> m_writeSpan;
  156. //! Stores the total compressed size of all blocks of the file
  157. //! if they were stored without alignment
  158. AZ::u64 m_totalUnalignedSize{};
  159. };
  160. using CompressContentOutcome = AZStd::expected<ContentFileBlocks, ResultString>;
  161. //! Uses the AZ Task system to compress 2 MiB blocks of a content file in parallel
  162. //! @param compressionBuffer output buffer to write compressed content
  163. //! @oaram fileSettings settings controlling how to write the content data into the archive stream
  164. //! @param contentFileData input buffer of file content to write to as a file using the file path
  165. //! specified in the fileSettings
  166. //! @return an outcome that contains a span if the content fileData was successfully compressed
  167. //! otherwise a failure string containing the error that occurs when attempting to compressed
  168. //! the asynchronous content
  169. CompressContentOutcome CompressContentFileAsync(AZStd::vector<AZStd::byte>& compressionBuffer,
  170. const ArchiveWriterFileSettings& fileSettings, AZStd::span<const AZStd::byte> inputDataSpan);
  171. //! In-memory structure which stores metadata about the file contents after being
  172. //! sent through any compression algorithm and path normalization
  173. struct ContentFileData
  174. {
  175. //! The file path to uses for the content being written to the archive
  176. //! This path has been posted processed to to account for any changes
  177. //! to file case due to the `ArchiveWriterFileSettings::m_fileCase` member
  178. AZ::IO::PathView m_relativeFilePath;
  179. //! stores block data about the file contents to write to block section of archive
  180. //! The block data contains offsets into the buffer to write
  181. ContentFileBlocks m_contentFileBlocks;
  182. //! Reference to the file contents span that was supplied to @AddFileToArchive
  183. //! This is used to retrieve the uncompressed size of the file contents
  184. //! and to perform a CRC32 over the uncompressed data
  185. AZStd::span<const AZStd::byte> m_uncompressedSpan;
  186. };
  187. //! Update the archive header with the new file count and the location
  188. //! of the file in the archive
  189. ArchiveFileToken WriteContentFileToArchive(const ArchiveWriterFileSettings& fileSettings,
  190. const ContentFileData& contentFileData);
  191. //! Helper function to update the TOC block offset table entries for the file
  192. //! being written
  193. //! @return Index into the block offset table where the compressed size
  194. //! of the 2 MiB blocks are stored
  195. AZ::u64 UpdateBlockOffsetEntryForFile(const ContentFileData& contentFileData);
  196. //! Reads the archive header from the generic stream
  197. bool ReadArchiveHeader(ArchiveHeader& archiveHeader, AZ::IO::GenericStream& archiveStream);
  198. //! Reads the archive table of contents from the generic stream by using the archive header
  199. //! to determine the offset and size of the table of contents
  200. bool ReadArchiveTOC(ArchiveTableOfContents& archiveToc, AZ::IO::GenericStream& archiveStream,
  201. const ArchiveHeader& archiveHeader);
  202. //! Builds in-memory acceleration structures for quick look up of deleted Archive files blocks
  203. //! This is done by starting from the first deleted block offset in the Archive Header
  204. //! and iterating through the blocks within the block section of the file
  205. //! The deleted block map provides a mapping of free blocks size to offset within the archive
  206. bool BuildDeletedFileBlocksMap(const ArchiveHeader& archiveHeader,
  207. AZ::IO::GenericStream& archiveStream);
  208. //! Iterates over the deleted Archive file block map and merges
  209. //! Any deleted blocks that are next to each other into a single entry
  210. //! of the combined sizes
  211. //! NOTE: This iterates over the entire deleted block map
  212. //! so the operation is takes longer the more deleted blocks there are
  213. void MergeContiguousDeletedBlocks();
  214. //! Creates a mapping of views to the file paths within the archive to the ArchiveFileToken
  215. //! The ArchiveFileToken currently corresponds to the index within the table of contents
  216. //! ArchiveTocFilePathIndex, ArchiveTocFileMetadata and ArchiveFilePath vector structures
  217. bool BuildFilePathMap(const ArchiveTableOfContents& archiveToc);
  218. //! Returns an offset to seek to in the archive stream, where the content file data should
  219. //! be written
  220. //! If the fileSize can fit within a deleted file block, it offset is extract from the deleted block map
  221. //! and returned
  222. //! Otherwise the table of contents start offset is returned and archive header updates that table of contents
  223. //! value by the amount to be written
  224. AZ::u64 ExtractWriteBlockOffset(AZ::u64 alignedFileSizeToWrite);
  225. //! Encapsulates the result of converting the ArchiveTableOfContents structure
  226. //! into a raw byte buffer
  227. struct WriteTocRawResult
  228. {
  229. // Return true of there is an error compressing the span
  230. explicit operator bool() const;
  231. //! Stores a span to the raw toc data if success
  232. AZStd::span<const AZStd::byte> m_tocSpan;
  233. //! Stores any error messages if writing the TOC data to a raw buffer has failed
  234. ResultString m_errorString;
  235. };
  236. //! Writes the Table of Contents into a raw buffer
  237. //! @param tocOutputBuffer output buffer to write uncompressed raw TOC data
  238. //! @param tocUncompressedInputSpan input buffer of the raw table of contents data to write
  239. //! @return a result structure containing a span in the output buffer containing
  240. //! the raw TOC data and its actual size
  241. WriteTocRawResult WriteTocRaw(AZStd::vector<AZStd::byte>& tocOutputBuffer);
  242. //! Encapsulates the result of compression a raw buffer of table of contents data
  243. //! data
  244. struct CompressTocRawResult
  245. {
  246. // Return true of there is an error compressing the span
  247. explicit operator bool() const;
  248. //! Stores a span to the compressed TOC if successful
  249. //! reference to the uncompressed TOC input span if not
  250. AZStd::span<const AZStd::byte> m_compressedTocSpan;
  251. //! Stores any error messages if compression fails
  252. ResultString m_errorString;
  253. };
  254. //! Compresses the raw Table of Contents using the table of contents compression
  255. //! algorithm specified in the Archive header
  256. //! @param tocCompressionBuffer output buffer to write compressed table of contents
  257. //! @param tocUncompressedInputSpan input buffer of the raw table of contents data to write
  258. //! @param compressionAlgorithmId the compression algorithm to use for compressing the Table of Contents
  259. //! @return a result structure containing a span within the compression buffer of the compressed TOC data
  260. //! if successful, otherwise a span to the original input span is returned
  261. CompressTocRawResult CompressTocRaw(AZStd::vector<AZStd::byte>& tocCompressionBuffer,
  262. AZStd::span<const AZStd::byte> uncompressedTocInputSpan,
  263. Compression::CompressionAlgorithmId compressionAlgorithmId);
  264. //! Archive Writer specific settings
  265. //! Controls the compression algorithm used to write the table of contents
  266. //! Also contains an error callback that is invoked with an ArchiveWriterError
  267. //! instance containing the error that occurs when using this class
  268. ArchiveWriterSettings m_settings;
  269. //! Archive header which is updated in place and written to the archive stream
  270. //! when the archive data is committed
  271. //! When a stream with an existing archive is supplied,
  272. //! this value is initialized using that archive
  273. ArchiveHeader m_archiveHeader;
  274. //! Archive TOC which manages in-memory file metadata about content within the archive
  275. //! The TOC is read from the archive stream, if an existing archive is supplied
  276. //! and the archive header was able to be successfully read
  277. //!
  278. //! NOTE: The File Metadata vector, File Path Index vector and File Path
  279. //! are never resized downwards.
  280. //! When a file is deleted, it is marked deleted by adding its index to the removedFileIndices set below
  281. //! When a file is added, then the following logic occurs
  282. //! If there is an entry in the removed file set, then the existing entry in the File Path Index vector and File Path vector
  283. //! at that index stored in the removed file set
  284. //! Otherwise a new entry is appended to the end of the those vectors
  285. ArchiveTableOfContents m_archiveToc;
  286. //! Stores mapping of FilePath to index within the file path table in the Archive TOC
  287. using FilePathTable = AZStd::unordered_map<AZ::IO::Path, size_t>;
  288. FilePathTable m_pathMap;
  289. //! Set containing the index of removed file entries in the table of contents
  290. //! for this specific ArchiveWriter instance
  291. //! The ArchiveWriter itself never writes out removed file entries and this set
  292. //! is only for in-memory use when updating an archive.
  293. //! NOTE: This is not an ArchiveTocFilePathIndex variable inside the File Path Index vector
  294. //! The value here is an integer index into a vector of ArchiveTocFilePathIndex instances
  295. //! The size of the
  296. using RemovedFileIndexSet = AZStd::set<AZ::u64>;
  297. RemovedFileIndexSet m_removedFileIndices;
  298. //! Stores a table that maps the unused size represented by the
  299. //! deleted raw block data to a sorted set of offsets into the mounted archive stream
  300. //! where the deleted block data starts
  301. //! This map is used to quickly lookup deleted blocks within an existing archive file
  302. //! which can be re-used to write the file data for file that is being added or updated
  303. using DeletedBlockMap = AZStd::map<AZ::u64, AZStd::set<AZ::u64>>;
  304. DeletedBlockMap m_deletedBlockSizeToOffsetMap;
  305. //! GenericStream pointer which stores the open archive
  306. ArchiveStreamPtr m_archiveStream;
  307. //! Protects reads and writes to the archive stream
  308. AZStd::mutex m_archiveStreamMutex;
  309. //! Task Executor used to compress blocks of a file in parallel
  310. AZ::TaskExecutor m_taskWriteExecutor;
  311. };
  312. } // namespace Archive