CompressedBlob.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. // Copyright 2008 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "DiscIO/CompressedBlob.h"
  4. #include <algorithm>
  5. #include <cstdio>
  6. #include <cstring>
  7. #include <memory>
  8. #include <string>
  9. #include <utility>
  10. #include <vector>
  11. #include <zlib.h>
  12. #ifdef _WIN32
  13. #include <windows.h>
  14. #include <io.h>
  15. #endif
  16. #include "Common/Assert.h"
  17. #include "Common/CommonTypes.h"
  18. #include "Common/FileUtil.h"
  19. #include "Common/Hash.h"
  20. #include "Common/IOFile.h"
  21. #include "Common/Logging/Log.h"
  22. #include "Common/MsgHandler.h"
  23. #include "DiscIO/Blob.h"
  24. #include "DiscIO/DiscScrubber.h"
  25. #include "DiscIO/MultithreadedCompressor.h"
  26. #include "DiscIO/Volume.h"
  27. namespace DiscIO
  28. {
  29. bool IsGCZBlob(File::IOFile& file);
  30. CompressedBlobReader::CompressedBlobReader(File::IOFile file, const std::string& filename)
  31. : m_file(std::move(file)), m_file_name(filename)
  32. {
  33. m_file_size = m_file.GetSize();
  34. m_file.Seek(0, File::SeekOrigin::Begin);
  35. m_file.ReadArray(&m_header, 1);
  36. SetSectorSize(m_header.block_size);
  37. // cache block pointers and hashes
  38. m_block_pointers.resize(m_header.num_blocks);
  39. m_file.ReadArray(m_block_pointers.data(), m_header.num_blocks);
  40. m_hashes.resize(m_header.num_blocks);
  41. m_file.ReadArray(m_hashes.data(), m_header.num_blocks);
  42. m_data_offset = (sizeof(CompressedBlobHeader)) +
  43. (sizeof(u64)) * m_header.num_blocks // skip block pointers
  44. + (sizeof(u32)) * m_header.num_blocks; // skip hashes
  45. // A compressed block is never ever longer than a decompressed block, so just header.block_size
  46. // should be fine.
  47. // I still add some safety margin.
  48. const u32 zlib_buffer_size = m_header.block_size + 64;
  49. m_zlib_buffer.resize(zlib_buffer_size);
  50. }
  51. std::unique_ptr<CompressedBlobReader> CompressedBlobReader::Create(File::IOFile file,
  52. const std::string& filename)
  53. {
  54. if (IsGCZBlob(file))
  55. return std::unique_ptr<CompressedBlobReader>(
  56. new CompressedBlobReader(std::move(file), filename));
  57. return nullptr;
  58. }
  59. CompressedBlobReader::~CompressedBlobReader()
  60. {
  61. }
  62. std::unique_ptr<BlobReader> CompressedBlobReader::CopyReader() const
  63. {
  64. return Create(m_file.Duplicate("rb"), m_file_name);
  65. }
  66. // IMPORTANT: Calling this function invalidates all earlier pointers gotten from this function.
  67. u64 CompressedBlobReader::GetBlockCompressedSize(u64 block_num) const
  68. {
  69. u64 start = m_block_pointers[block_num];
  70. if (block_num < m_header.num_blocks - 1)
  71. return m_block_pointers[block_num + 1] - start;
  72. else if (block_num == m_header.num_blocks - 1)
  73. return m_header.compressed_data_size - start;
  74. else
  75. ERROR_LOG_FMT(DISCIO, "{} - illegal block number {}", __func__, block_num);
  76. return 0;
  77. }
  78. bool CompressedBlobReader::GetBlock(u64 block_num, u8* out_ptr)
  79. {
  80. bool uncompressed = false;
  81. u32 comp_block_size = (u32)GetBlockCompressedSize(block_num);
  82. u64 offset = m_block_pointers[block_num] + m_data_offset;
  83. if (offset & (1ULL << 63))
  84. {
  85. if (comp_block_size != m_header.block_size)
  86. ERROR_LOG_FMT(DISCIO, "Uncompressed block with wrong size");
  87. uncompressed = true;
  88. offset &= ~(1ULL << 63);
  89. }
  90. // clear unused part of zlib buffer. maybe this can be deleted when it works fully.
  91. memset(&m_zlib_buffer[comp_block_size], 0, m_zlib_buffer.size() - comp_block_size);
  92. m_file.Seek(offset, File::SeekOrigin::Begin);
  93. if (!m_file.ReadBytes(m_zlib_buffer.data(), comp_block_size))
  94. {
  95. ERROR_LOG_FMT(DISCIO, "The disc image \"{}\" is truncated, some of the data is missing.",
  96. m_file_name);
  97. m_file.ClearError();
  98. return false;
  99. }
  100. // First, check hash.
  101. const u32 block_hash = Common::HashAdler32(m_zlib_buffer.data(), comp_block_size);
  102. if (block_hash != m_hashes[block_num])
  103. {
  104. ERROR_LOG_FMT(DISCIO,
  105. "The disc image \"{}\" is corrupt.\n"
  106. "Hash of block {} is {:08x} instead of {:08x}.",
  107. m_file_name, block_num, block_hash, m_hashes[block_num]);
  108. }
  109. if (uncompressed)
  110. {
  111. std::copy_n(m_zlib_buffer.begin(), comp_block_size, out_ptr);
  112. }
  113. else
  114. {
  115. z_stream z = {};
  116. z.next_in = m_zlib_buffer.data();
  117. z.avail_in = comp_block_size;
  118. if (z.avail_in > m_header.block_size)
  119. {
  120. ERROR_LOG_FMT(DISCIO, "Compressed block size is larger than uncompressed block size");
  121. }
  122. z.next_out = out_ptr;
  123. z.avail_out = m_header.block_size;
  124. inflateInit(&z);
  125. int status = inflate(&z, Z_FULL_FLUSH);
  126. u32 uncomp_size = m_header.block_size - z.avail_out;
  127. if (status != Z_STREAM_END)
  128. {
  129. // this seem to fire wrongly from time to time
  130. // to be sure, don't use compressed isos :P
  131. ERROR_LOG_FMT(DISCIO, "Failure reading block {} - out of data and not at end.", block_num);
  132. }
  133. inflateEnd(&z);
  134. if (uncomp_size != m_header.block_size)
  135. {
  136. ERROR_LOG_FMT(DISCIO, "Wrong block size");
  137. return false;
  138. }
  139. }
  140. return true;
  141. }
  142. struct CompressThreadState
  143. {
  144. CompressThreadState() : z{} {}
  145. ~CompressThreadState() { deflateEnd(&z); }
  146. // z_stream will stop working if it changes address, so this object must not be moved
  147. CompressThreadState(const CompressThreadState&) = delete;
  148. CompressThreadState(CompressThreadState&&) = delete;
  149. CompressThreadState& operator=(const CompressThreadState&) = delete;
  150. CompressThreadState& operator=(CompressThreadState&&) = delete;
  151. std::vector<u8> compressed_buffer;
  152. z_stream z;
  153. };
  154. struct CompressParameters
  155. {
  156. std::vector<u8> data{};
  157. u32 block_number = 0;
  158. u64 inpos = 0;
  159. };
  160. struct OutputParameters
  161. {
  162. std::vector<u8> data{};
  163. u32 block_number = 0;
  164. bool compressed = false;
  165. u64 inpos = 0;
  166. };
  167. static ConversionResultCode SetUpCompressThreadState(CompressThreadState* state)
  168. {
  169. return deflateInit(&state->z, 9) == Z_OK ? ConversionResultCode::Success :
  170. ConversionResultCode::InternalError;
  171. }
  172. static ConversionResult<OutputParameters> Compress(CompressThreadState* state,
  173. CompressParameters parameters, int block_size,
  174. std::vector<u32>* hashes, int* num_stored,
  175. int* num_compressed)
  176. {
  177. state->compressed_buffer.resize(block_size);
  178. int retval = deflateReset(&state->z);
  179. state->z.next_in = parameters.data.data();
  180. state->z.avail_in = block_size;
  181. state->z.next_out = state->compressed_buffer.data();
  182. state->z.avail_out = block_size;
  183. if (retval != Z_OK)
  184. {
  185. ERROR_LOG_FMT(DISCIO, "Deflate failed");
  186. return ConversionResultCode::InternalError;
  187. }
  188. const int status = deflate(&state->z, Z_FINISH);
  189. state->compressed_buffer.resize(block_size - state->z.avail_out);
  190. OutputParameters output_parameters;
  191. if ((status != Z_STREAM_END) || (state->z.avail_out < 10))
  192. {
  193. // let's store uncompressed
  194. ++*num_stored;
  195. output_parameters = OutputParameters{std::move(parameters.data), parameters.block_number, false,
  196. parameters.inpos};
  197. }
  198. else
  199. {
  200. // let's store compressed
  201. ++*num_compressed;
  202. output_parameters = OutputParameters{std::move(state->compressed_buffer),
  203. parameters.block_number, true, parameters.inpos};
  204. }
  205. (*hashes)[parameters.block_number] =
  206. Common::HashAdler32(output_parameters.data.data(), output_parameters.data.size());
  207. return std::move(output_parameters);
  208. }
  209. static ConversionResultCode Output(OutputParameters parameters, File::IOFile* outfile,
  210. u64* position, std::vector<u64>* offsets, int progress_monitor,
  211. u32 num_blocks, CompressCB callback)
  212. {
  213. u64 offset = *position;
  214. if (!parameters.compressed)
  215. offset |= 0x8000000000000000ULL;
  216. (*offsets)[parameters.block_number] = offset;
  217. *position += parameters.data.size();
  218. if (!outfile->WriteBytes(parameters.data.data(), parameters.data.size()))
  219. return ConversionResultCode::WriteFailed;
  220. if (parameters.block_number % progress_monitor == 0)
  221. {
  222. const int ratio =
  223. parameters.inpos == 0 ? 0 : static_cast<int>(100 * *position / parameters.inpos);
  224. const std::string text = Common::FmtFormatT("{0} of {1} blocks. Compression ratio {2}%",
  225. parameters.block_number, num_blocks, ratio);
  226. const float completion = static_cast<float>(parameters.block_number) / num_blocks;
  227. if (!callback(text, completion))
  228. return ConversionResultCode::Canceled;
  229. }
  230. return ConversionResultCode::Success;
  231. }
  232. bool ConvertToGCZ(BlobReader* infile, const std::string& infile_path,
  233. const std::string& outfile_path, u32 sub_type, int block_size,
  234. CompressCB callback)
  235. {
  236. ASSERT(infile->GetDataSizeType() == DataSizeType::Accurate);
  237. File::IOFile outfile(outfile_path, "wb");
  238. if (!outfile)
  239. {
  240. PanicAlertFmtT(
  241. "Failed to open the output file \"{0}\".\n"
  242. "Check that you have permissions to write the target folder and that the media can "
  243. "be written.",
  244. outfile_path);
  245. return false;
  246. }
  247. callback(Common::GetStringT("Files opened, ready to compress."), 0);
  248. CompressedBlobHeader header;
  249. header.magic_cookie = GCZ_MAGIC;
  250. header.sub_type = sub_type;
  251. header.block_size = block_size;
  252. header.data_size = infile->GetDataSize();
  253. // round upwards!
  254. header.num_blocks = (u32)((header.data_size + (block_size - 1)) / block_size);
  255. std::vector<u64> offsets(header.num_blocks);
  256. std::vector<u32> hashes(header.num_blocks);
  257. // seek past the header (we will write it at the end)
  258. outfile.Seek(sizeof(CompressedBlobHeader), File::SeekOrigin::Current);
  259. // seek past the offset and hash tables (we will write them at the end)
  260. outfile.Seek((sizeof(u64) + sizeof(u32)) * header.num_blocks, File::SeekOrigin::Current);
  261. // Now we are ready to write compressed data!
  262. u64 inpos = 0;
  263. u64 position = 0;
  264. int num_compressed = 0;
  265. int num_stored = 0;
  266. int progress_monitor = std::max<int>(1, header.num_blocks / 1000);
  267. const auto compress = [&](CompressThreadState* state, CompressParameters parameters) {
  268. return Compress(state, std::move(parameters), block_size, &hashes, &num_stored,
  269. &num_compressed);
  270. };
  271. const auto output = [&](OutputParameters parameters) {
  272. return Output(std::move(parameters), &outfile, &position, &offsets, progress_monitor,
  273. header.num_blocks, callback);
  274. };
  275. MultithreadedCompressor<CompressThreadState, CompressParameters, OutputParameters> compressor(
  276. SetUpCompressThreadState, compress, output);
  277. std::vector<u8> in_buf(block_size);
  278. for (u32 i = 0; i < header.num_blocks; i++)
  279. {
  280. if (compressor.GetStatus() != ConversionResultCode::Success)
  281. break;
  282. const u64 bytes_to_read = std::min<u64>(block_size, header.data_size - inpos);
  283. if (!infile->Read(inpos, bytes_to_read, in_buf.data()))
  284. {
  285. compressor.SetError(ConversionResultCode::ReadFailed);
  286. break;
  287. }
  288. std::fill(in_buf.begin() + bytes_to_read, in_buf.begin() + header.block_size, 0);
  289. inpos += block_size;
  290. compressor.CompressAndWrite(CompressParameters{in_buf, i, inpos});
  291. }
  292. compressor.Shutdown();
  293. header.compressed_data_size = position;
  294. const ConversionResultCode result = compressor.GetStatus();
  295. if (result != ConversionResultCode::Success)
  296. {
  297. // Remove the incomplete output file.
  298. outfile.Close();
  299. File::Delete(outfile_path);
  300. }
  301. else
  302. {
  303. // Okay, go back and fill in headers
  304. outfile.Seek(0, File::SeekOrigin::Begin);
  305. outfile.WriteArray(&header, 1);
  306. outfile.WriteArray(offsets.data(), header.num_blocks);
  307. outfile.WriteArray(hashes.data(), header.num_blocks);
  308. callback(Common::GetStringT("Done compressing disc image."), 1.0f);
  309. }
  310. if (result == ConversionResultCode::ReadFailed)
  311. PanicAlertFmtT("Failed to read from the input file \"{0}\".", infile_path);
  312. if (result == ConversionResultCode::WriteFailed)
  313. {
  314. PanicAlertFmtT("Failed to write the output file \"{0}\".\n"
  315. "Check that you have enough space available on the target drive.",
  316. outfile_path);
  317. }
  318. return result == ConversionResultCode::Success;
  319. }
  320. bool IsGCZBlob(File::IOFile& file)
  321. {
  322. const u64 position = file.Tell();
  323. if (!file.Seek(0, File::SeekOrigin::Begin))
  324. return false;
  325. CompressedBlobHeader header;
  326. bool is_gcz = file.ReadArray(&header, 1) && header.magic_cookie == GCZ_MAGIC;
  327. file.Seek(position, File::SeekOrigin::Begin);
  328. return is_gcz;
  329. }
  330. } // namespace DiscIO