ArchiveReader.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #pragma once
  9. #include <Archive/Clients/ArchiveBaseAPI.h>
  10. #include <Archive/Clients/ArchiveReaderAPI.h>
  11. #include <Clients/ArchiveTOCView.h>
  12. #include <AzCore/Memory/Memory_fwd.h>
  13. #include <AzCore/RTTI/RTTIMacros.h>
  14. #include <AzCore/std/parallel/mutex.h>
  15. #include <AzCore/std/smart_ptr/unique_ptr.h>
  16. #include <AzCore/std/utility/to_underlying.h>
  17. #include <AzCore/Task/TaskExecutor.h>
  18. namespace AZ
  19. {
  20. class TaskGraphEvent;
  21. }
  22. namespace AZ::IO
  23. {
  24. class GenericStream;
  25. }
  26. namespace Archive
  27. {
  28. //! Implements the Archive Reader Interface
  29. //! This can be used to read and extract files from an archive
  30. class ArchiveReader
  31. : public IArchiveReader
  32. {
  33. public:
  34. AZ_TYPE_INFO_WITH_NAME_DECL(ArchiveReader);
  35. AZ_RTTI_NO_TYPE_INFO_DECL();
  36. AZ_CLASS_ALLOCATOR_DECL;
  37. ArchiveReader();
  38. //! Create an archive reader using the specified reader settings
  39. explicit ArchiveReader(const ArchiveReaderSettings& readerSettings);
  40. //! Open an file at the specified file path and takes sole ownership of it
  41. //! The ArchiveReader will close the file on Unmount
  42. explicit ArchiveReader(AZ::IO::PathView archivePath, const ArchiveReaderSettings& readerSettings = {});
  43. //! Takes ownership of the open stream and will optionally delete it based on the ArchiveFileDeleter
  44. explicit ArchiveReader(ArchiveStreamPtr archiveStream, const ArchiveReaderSettings& readerSettings = {});
  45. ~ArchiveReader();
  46. //! Opens the archive path and returns true if successful
  47. //! Will unmount any previously mounted archive
  48. bool MountArchive(AZ::IO::PathView archivePath) override;
  49. bool MountArchive(ArchiveStreamPtr archiveStream) override;
  50. //! Closes the handle to the mounted archive stream
  51. void UnmountArchive() override;
  52. //! Returns if an open archive that is mounted
  53. bool IsMounted() const override;
  54. //! Reads the content of the file specified in the ArchiveReadeFileSettings
  55. //! The file path identifier in the settings is used to locate the file to extract from the archive
  56. //! The outputSpan should be a pre-allocated buffer that is large enough
  57. //! to fit either the uncompressed size of the file if the `m_decompressFile` setting is true
  58. //! or the compressed size of the file if the `m_decompressFile` setting is false
  59. //!
  60. //! @param outputSpan pre-allocated buffer that should be large enough to store the extracted
  61. //! file
  62. //! @param fileSettings settings which can configure whether the file should be decompressed,
  63. //! the start offset where to start reading content within the file, how many bytes
  64. //! to read from the file, etc...
  65. //! @return ArchiveExtractFileResult structure which on success contains
  66. //! a span of the actual data extracted from the Archive.
  67. //! NOTE: The extracted data can be smaller than the outputSpan.size()
  68. //! On failure, the result outcome member contains the error that occurred
  69. ArchiveExtractFileResult ExtractFileFromArchive(AZStd::span<AZStd::byte> outputSpan,
  70. const ArchiveReaderFileSettings& fileSettings) override;
  71. //! List the file metadata from the archive using the ArchiveFileToken
  72. //! @param filePathToken identifier token that can be used to quickly lookup
  73. //! metadata about the file
  74. //! @return ArchiveListResult with metadata for the file if found
  75. ArchiveListFileResult ListFileInArchive(ArchiveFileToken filePathToken) const override;
  76. //! List the file metadata from the archive using the relative FilePath
  77. //! @param relativePath File path to lookup within the archive
  78. //! @return ArchiveListResult with metadata for the file if found
  79. ArchiveListFileResult ListFileInArchive(AZ::IO::PathView relativePath) const override;
  80. //! Returns if the archive contains a relative path
  81. //! @param relativePath Relative path within archive to search for
  82. //! @returns true if the relative path is contained with the Archive
  83. //! equivalent to `return FindFile(relativePath) != InvalidArchiveFileToken;`
  84. bool ContainsFile(AZ::IO::PathView relativePath) const override;
  85. //! Enumerates all files within the archive table of contents and invokes a callback
  86. //! function with the listing information about the file
  87. //! This function can be used to build filter files in the Archive based on any value
  88. //! supplied in the ArchiveListFileResult structure
  89. //! For example filtering can be done based on file path(such as globbing for all *.txt files)
  90. //! or filtering based on uncompressed size(such as locating all files > 2MiB, etc...)
  91. //! Alternatively this function can be used to list all of the files within the archive by
  92. //! binding a lambda that populates a vector
  93. //!
  94. //! @param listFileCallback Callback which is invoked for each file in the archive
  95. //! @return result structure that is convertible to a boolean value indicating if enumeration was successful
  96. EnumerateArchiveResult EnumerateFilesInArchive(ListFileCallback listFileCallback) const override;
  97. //! Dump metadata for the archive to the supplied generic stream
  98. //! @param metadataStream archive file metadata will be written to the stream
  99. //! @param metadataSettings settings using which control the file metadata to write to the stream
  100. //! @return true if metadata was successfully written
  101. bool DumpArchiveMetadata(AZ::IO::GenericStream& metadataStream,
  102. const ArchiveMetadataSettings& metadataSettings = {}) const override;
  103. private:
  104. //! Reads the Archive Header into memory.
  105. //! Afterwards the Archive Header is used to read the TOC into memory
  106. //! and build any structures for acceleration of lookups
  107. bool ReadArchiveHeaderAndToc();
  108. //! Reads the archive header from the generic stream
  109. bool ReadArchiveHeader(ArchiveHeader& archiveHeader, AZ::IO::GenericStream& archiveStream);
  110. //! Reads the archive table of contents from the generic stream by using the archive header
  111. //! to determine the offset and size of the table of contents
  112. struct ArchiveTableOfContentsReader;
  113. bool ReadArchiveTOC(ArchiveTableOfContentsReader& archiveToc, AZ::IO::GenericStream& archiveStream,
  114. const ArchiveHeader& archiveHeader);
  115. //! Creates a mapping of views to the file paths within the archive to the ArchiveFileToken
  116. //! The ArchiveFileToken currently corresponds to the index within the table of contents
  117. //! ArchiveTocFilePathIndex, ArchiveTocFileMetadata and ArchiveFilePath vector structures
  118. bool BuildFilePathMap(const ArchiveTableOfContentsView& archiveToc);
  119. //! Read data from offset within archive directly to span
  120. //! @param fileBuffer pre-allocated span to populate buffer with data
  121. //! @param offset absolute file within mounted archive to start reading data from
  122. //! @param fileSize the amount of data to read at the specified offset
  123. //! @param fileSettings setting structure which is used to configure where the
  124. //! start offset for reading within the extracted file and how many bytes to read from that
  125. //! start offset
  126. //! @return result outcome which contains an error messages related to reading the file
  127. //! if it fails
  128. using ReadRawFileOutcome = AZStd::expected<AZStd::span<AZStd::byte>, ResultString>;
  129. ReadRawFileOutcome ReadRawFileIntoBuffer(AZStd::span<AZStd::byte> fileBuffer,
  130. AZ::u64 offset, AZ::u64 fileSize,
  131. const ArchiveReaderFileSettings& fileSettings);
  132. //! Decompressed the content from the input buffer
  133. //! @param decompressionResultSpan span to populated with decompressed results
  134. //! @param fileSettings settings which indicate the max number of decompression task
  135. //! to use for decompressing the file content.
  136. //! This parameter also contains settings for selecting an offset within the decompressed
  137. //! file to start reading, as well as a cap on the number of bytes to read from that start offset
  138. //! @param extractFileResult Contains the compressed size, raw offset within the archive and uncompressed
  139. //! size of the file needed for extracting
  140. //! @return result outcome with a span containing a view of the decompressed file data
  141. //! within the offset range specified by
  142. //! [ArchiveReaderFileSettings::m_startOffset, ArchiveReaderFileSettings::m_startOffset + ArchiveReaderFileSettings::m_bytesToRead)
  143. //! Otherwise an error message string providing reasons why decompression failed
  144. using ReadCompressedFileOutcome = AZStd::expected<AZStd::span<AZStd::byte>, ResultString>;
  145. ReadCompressedFileOutcome ReadCompressedFileIntoBuffer(AZStd::span<AZStd::byte> decompressionResultSpan,
  146. const ArchiveReaderFileSettings& fileSettings,
  147. const ArchiveExtractFileResult& extractFileResult);
  148. // Private Member variables section
  149. //! Archive Reader specific settings
  150. //! Controls the number of tasks to use for reading and decompression of content
  151. //! from the archive
  152. //! Also contains an error callback that is invoked when error occurs in the constructor
  153. ArchiveReaderSettings m_settings;
  154. //! Archive header as read from the first sizeof(ArchiveHeader) blocks of the archive stream
  155. //! The header is not modified by the reader
  156. ArchiveHeader m_archiveHeader;
  157. //! View of the Archive TOC within the supplied archive stream
  158. //! Since the ArchiveReader doesn't mutate the archive, a Table of Contents View is used
  159. //! and paired with a raw buffer of the Table of Contents
  160. struct ArchiveTableOfContentsReader
  161. {
  162. // Default a table of contents reader that has an empty vector
  163. // and default constructed view
  164. ArchiveTableOfContentsReader();
  165. // Stores the buffer containing the Table of Contents raw data
  166. // and an ArchiveTableOfContentsView instance which is a read-only view into that raw data
  167. ArchiveTableOfContentsReader(AZStd::vector<AZStd::byte> tocBuffer, ArchiveTableOfContentsView tocView);
  168. ArchiveTableOfContentsView m_tocView;
  169. private:
  170. AZStd::vector<AZStd::byte> m_tocBuffer;
  171. };
  172. ArchiveTableOfContentsReader m_archiveToc;
  173. //! Stores mapping of FilePath to index within the file path table in the Archive TOC
  174. //! The index is used to as the ArchiveFileToken
  175. //! IMPORTANT: The PathView is a view into the m_archiveToc TOC buffer
  176. //! and therefore this map should be cleared before reading another archive TOC
  177. using FilePathTable = AZStd::unordered_map<AZ::IO::PathView, size_t>;
  178. FilePathTable m_pathMap;
  179. //! GenericStream pointer which stores the open archive
  180. ArchiveStreamPtr m_archiveStream;
  181. //! Protects reads within the archive stream
  182. //! NOTE: This does restrict read jobs to be done on one thread at a time
  183. //! if done using the AZ::IO::GenericStream API as it maintains a single seek position
  184. AZStd::mutex m_archiveStreamMutex;
  185. //! Task Executor used to decompress blocks of a file in parallel
  186. AZ::TaskExecutor m_taskExecutor;
  187. };
  188. } // namespace Archive