szip.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  3. * You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. #include <algorithm>
  5. #include <map>
  6. #include <sys/stat.h>
  7. #include <string>
  8. #include <sstream>
  9. #include <cstring>
  10. #include <cstdlib>
  11. #include <zlib.h>
  12. #include <fcntl.h>
  13. #include <errno.h>
  14. #include "mozilla/Assertions.h"
  15. #include "mozilla/Scoped.h"
  16. #include "mozilla/UniquePtr.h"
  17. #include "SeekableZStream.h"
  18. #include "Utils.h"
  19. #include "Logging.h"
  20. Logging Logging::Singleton;
  21. const char *filterName[] = {
  22. "none",
  23. "thumb",
  24. "arm",
  25. "x86",
  26. "auto"
  27. };
  28. /* Maximum supported size for chunkSize */
  29. static const size_t maxChunkSize =
  30. 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
  31. sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);
  32. class Buffer: public MappedPtr
  33. {
  34. public:
  35. virtual ~Buffer() { }
  36. virtual bool Resize(size_t size)
  37. {
  38. MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
  39. MAP_PRIVATE | MAP_ANON, -1, 0);
  40. if (buf == MAP_FAILED)
  41. return false;
  42. if (*this != MAP_FAILED)
  43. memcpy(buf, *this, std::min(size, GetLength()));
  44. Assign(buf);
  45. return true;
  46. }
  47. bool Fill(Buffer &other)
  48. {
  49. size_t size = other.GetLength();
  50. if (!size || !Resize(size))
  51. return false;
  52. memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
  53. return true;
  54. }
  55. };
  56. class FileBuffer: public Buffer
  57. {
  58. public:
  59. bool Init(const char *name, bool writable_ = false)
  60. {
  61. fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
  62. if (fd == -1)
  63. return false;
  64. writable = writable_;
  65. return true;
  66. }
  67. virtual bool Resize(size_t size)
  68. {
  69. if (writable) {
  70. if (ftruncate(fd, size) == -1)
  71. return false;
  72. }
  73. Assign(MemoryRange::mmap(nullptr, size,
  74. PROT_READ | (writable ? PROT_WRITE : 0),
  75. writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
  76. return this != MAP_FAILED;
  77. }
  78. int getFd()
  79. {
  80. return fd;
  81. }
  82. private:
  83. AutoCloseFD fd;
  84. bool writable;
  85. };
  86. class FilteredBuffer: public Buffer
  87. {
  88. public:
  89. void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
  90. {
  91. SeekableZStream::ZStreamFilter filterCB =
  92. SeekableZStream::GetFilter(filter);
  93. MOZ_ASSERT(filterCB);
  94. Fill(other);
  95. size_t size = other.GetLength();
  96. Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
  97. size_t avail = 0;
  98. /* Filter needs to be applied in chunks. */
  99. while (size) {
  100. avail = std::min(size, chunkSize);
  101. filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
  102. SeekableZStream::FILTER, data, avail);
  103. size -= avail;
  104. data += avail;
  105. }
  106. }
  107. };
  108. template <typename T>
  109. class Dictionary: public Buffer
  110. {
  111. typedef T piece;
  112. typedef std::pair<piece, int> stat_pair;
  113. static bool stat_cmp(stat_pair a, stat_pair b)
  114. {
  115. return a.second < b.second;
  116. }
  117. public:
  118. Dictionary(Buffer &inBuf, size_t size)
  119. {
  120. if (!size || !Resize(size))
  121. return;
  122. DEBUG_LOG("Creating dictionary");
  123. piece *origBufPieces = reinterpret_cast<piece *>(
  124. static_cast<void *>(inBuf));
  125. std::map<piece, int> stats;
  126. for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
  127. stats[origBufPieces[i]]++;
  128. }
  129. std::vector<stat_pair> statsVec(stats.begin(), stats.end());
  130. std::sort(statsVec.begin(), statsVec.end(), stat_cmp);
  131. piece *dictPieces = reinterpret_cast<piece *>(
  132. static_cast<void *>(*this));
  133. typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
  134. for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
  135. i--, ++it) {
  136. dictPieces[i - 1] = it->first;
  137. }
  138. }
  139. };
  140. class SzipAction
  141. {
  142. public:
  143. virtual int run(const char *name, Buffer &origBuf,
  144. const char *outName, Buffer &outBuf) = 0;
  145. virtual ~SzipAction() {}
  146. };
  147. class SzipDecompress: public SzipAction
  148. {
  149. public:
  150. int run(const char *name, Buffer &origBuf,
  151. const char *outName, Buffer &outBuf);
  152. };
  153. class SzipCompress: public SzipAction
  154. {
  155. public:
  156. int run(const char *name, Buffer &origBuf,
  157. const char *outName, Buffer &outBuf);
  158. SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
  159. size_t aDictSize)
  160. : chunkSize(aChunkSize ? aChunkSize : 16384)
  161. , filter(aFilter)
  162. , dictSize(aDictSize)
  163. {}
  164. const static signed char winSizeLog = 15;
  165. const static size_t winSize = 1 << winSizeLog;
  166. const static SeekableZStream::FilterId DEFAULT_FILTER =
  167. #if defined(TARGET_THUMB)
  168. SeekableZStream::BCJ_THUMB;
  169. #elif defined(TARGET_ARM)
  170. SeekableZStream::BCJ_ARM;
  171. #elif defined(TARGET_X86)
  172. SeekableZStream::BCJ_X86;
  173. #else
  174. SeekableZStream::NONE;
  175. #endif
  176. private:
  177. int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
  178. size_t aDictSize, SeekableZStream::FilterId aFilter);
  179. size_t chunkSize;
  180. SeekableZStream::FilterId filter;
  181. size_t dictSize;
  182. };
  183. /* Decompress a seekable compressed stream */
  184. int SzipDecompress::run(const char *name, Buffer &origBuf,
  185. const char *outName, Buffer &outBuf)
  186. {
  187. size_t origSize = origBuf.GetLength();
  188. if (origSize < sizeof(SeekableZStreamHeader)) {
  189. ERROR("%s is not compressed", name);
  190. return 0;
  191. }
  192. SeekableZStream zstream;
  193. if (!zstream.Init(origBuf, origSize))
  194. return 0;
  195. size_t size = zstream.GetUncompressedSize();
  196. /* Give enough room for the uncompressed data */
  197. if (!outBuf.Resize(size)) {
  198. ERROR("Error resizing %s: %s", outName, strerror(errno));
  199. return 1;
  200. }
  201. if (!zstream.Decompress(outBuf, 0, size))
  202. return 1;
  203. return 0;
  204. }
  205. /* Generate a seekable compressed stream. */
  206. int SzipCompress::run(const char *name, Buffer &origBuf,
  207. const char *outName, Buffer &outBuf)
  208. {
  209. size_t origSize = origBuf.GetLength();
  210. if (origSize == 0) {
  211. ERROR("Won't compress %s: it's empty", name);
  212. return 1;
  213. }
  214. if (SeekableZStreamHeader::validate(origBuf)) {
  215. WARN("Skipping %s: it's already a szip", name);
  216. return 0;
  217. }
  218. bool compressed = false;
  219. LOG("Size = %" PRIuSize, origSize);
  220. /* Allocate a buffer the size of the uncompressed data: we don't want
  221. * a compressed file larger than that anyways. */
  222. if (!outBuf.Resize(origSize)) {
  223. ERROR("Couldn't allocate output buffer: %s", strerror(errno));
  224. return 1;
  225. }
  226. /* Find the most appropriate filter */
  227. SeekableZStream::FilterId firstFilter, lastFilter;
  228. bool scanFilters;
  229. if (filter == SeekableZStream::FILTER_MAX) {
  230. firstFilter = SeekableZStream::NONE;
  231. lastFilter = SeekableZStream::FILTER_MAX;
  232. scanFilters = true;
  233. } else {
  234. firstFilter = lastFilter = filter;
  235. ++lastFilter;
  236. scanFilters = false;
  237. }
  238. mozilla::UniquePtr<Buffer> filteredBuf;
  239. Buffer *origData;
  240. for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
  241. mozilla::UniquePtr<FilteredBuffer> filteredTmp;
  242. Buffer tmpBuf;
  243. if (f != SeekableZStream::NONE) {
  244. DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
  245. filteredTmp = mozilla::MakeUnique<FilteredBuffer>();
  246. filteredTmp->Filter(origBuf, f, chunkSize);
  247. origData = filteredTmp.get();
  248. } else {
  249. origData = &origBuf;
  250. }
  251. if (dictSize && !scanFilters) {
  252. filteredBuf = mozilla::Move(filteredTmp);
  253. break;
  254. }
  255. DEBUG_LOG("Compressing with no dictionary");
  256. if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
  257. if (tmpBuf.GetLength() < outBuf.GetLength()) {
  258. outBuf.Fill(tmpBuf);
  259. compressed = true;
  260. filter = f;
  261. filteredBuf = mozilla::Move(filteredTmp);
  262. continue;
  263. }
  264. }
  265. }
  266. origData = filteredBuf ? filteredBuf.get() : &origBuf;
  267. if (dictSize) {
  268. Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);
  269. /* Find the most appropriate dictionary size */
  270. size_t firstDictSize, lastDictSize;
  271. if (dictSize == (size_t) -1) {
  272. /* If we scanned for filters, we effectively already tried dictSize=0 */
  273. firstDictSize = scanFilters ? 4096 : 0;
  274. lastDictSize = SzipCompress::winSize;
  275. } else {
  276. firstDictSize = lastDictSize = dictSize;
  277. }
  278. Buffer tmpBuf;
  279. for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
  280. DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
  281. if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
  282. + SzipCompress::winSize - d, d, filter))
  283. continue;
  284. if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
  285. outBuf.Fill(tmpBuf);
  286. compressed = true;
  287. dictSize = d;
  288. }
  289. }
  290. }
  291. if (!compressed) {
  292. outBuf.Fill(origBuf);
  293. LOG("Not compressed");
  294. return 0;
  295. }
  296. if (dictSize == (size_t) -1)
  297. dictSize = 0;
  298. DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
  299. filterName[filter], dictSize);
  300. LOG("Compressed size is %" PRIuSize, outBuf.GetLength());
  301. /* Sanity check */
  302. Buffer tmpBuf;
  303. SzipDecompress decompress;
  304. if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
  305. return 1;
  306. size_t size = tmpBuf.GetLength();
  307. if (size != origSize) {
  308. ERROR("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
  309. return 1;
  310. }
  311. if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
  312. ERROR("Compression error: content mismatch");
  313. return 1;
  314. }
  315. return 0;
  316. }
  317. int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
  318. const unsigned char *aDict, size_t aDictSize,
  319. SeekableZStream::FilterId aFilter)
  320. {
  321. size_t origSize = origBuf.GetLength();
  322. MOZ_ASSERT(origSize != 0);
  323. /* Expected total number of chunks */
  324. size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);
  325. /* The first chunk is going to be stored after the header, the dictionary
  326. * and the offset table */
  327. size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
  328. + nChunks * sizeof(uint32_t);
  329. if (offset >= origSize)
  330. return 1;
  331. /* Allocate a buffer the size of the uncompressed data: we don't want
  332. * a compressed file larger than that anyways. */
  333. if (!outBuf.Resize(origSize)) {
  334. ERROR("Couldn't allocate output buffer: %s", strerror(errno));
  335. return 1;
  336. }
  337. SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
  338. unsigned char *dictionary = static_cast<unsigned char *>(
  339. outBuf + sizeof(SeekableZStreamHeader));
  340. le_uint32 *entry =
  341. reinterpret_cast<le_uint32 *>(dictionary + aDictSize);
  342. /* Initialize header */
  343. header->chunkSize = chunkSize;
  344. header->dictSize = aDictSize;
  345. header->totalSize = offset;
  346. header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
  347. // window size of 32k.
  348. header->filter = aFilter;
  349. if (aDictSize)
  350. memcpy(dictionary, aDict, aDictSize);
  351. /* Initialize zlib structure */
  352. z_stream zStream;
  353. memset(&zStream, 0, sizeof(zStream));
  354. zStream.avail_out = origSize - offset;
  355. zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
  356. size_t avail = 0;
  357. size_t size = origSize;
  358. unsigned char *data = reinterpret_cast<unsigned char *>(
  359. static_cast<void *>(origBuf));
  360. while (size) {
  361. avail = std::min(size, chunkSize);
  362. /* Compress chunk */
  363. int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
  364. MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
  365. if (aDictSize)
  366. deflateSetDictionary(&zStream, dictionary, aDictSize);
  367. MOZ_ASSERT(ret == Z_OK);
  368. zStream.avail_in = avail;
  369. zStream.next_in = data;
  370. ret = deflate(&zStream, Z_FINISH);
  371. /* Under normal conditions, deflate returns Z_STREAM_END. If there is not
  372. * enough room to compress, deflate returns Z_OK and avail_out is 0. We
  373. * still want to deflateEnd in that case, so fall through. It will bail
  374. * on the avail_out test that follows. */
  375. MOZ_ASSERT(ret == Z_STREAM_END || ret == Z_OK);
  376. ret = deflateEnd(&zStream);
  377. MOZ_ASSERT(ret == Z_OK);
  378. if (zStream.avail_out <= 0)
  379. return 1;
  380. size_t len = origSize - offset - zStream.avail_out;
  381. /* Adjust headers */
  382. header->totalSize += len;
  383. *entry++ = offset;
  384. header->nChunks++;
  385. /* Prepare for next iteration */
  386. size -= avail;
  387. data += avail;
  388. offset += len;
  389. }
  390. header->lastChunkSize = avail;
  391. MOZ_ASSERT(header->totalSize == offset);
  392. MOZ_ASSERT(header->nChunks == nChunks);
  393. if (!outBuf.Resize(offset)) {
  394. ERROR("Error truncating output: %s", strerror(errno));
  395. return 1;
  396. }
  397. return 0;
  398. }
  399. bool GetSize(const char *str, size_t *out)
  400. {
  401. char *end;
  402. MOZ_ASSERT(out);
  403. errno = 0;
  404. *out = strtol(str, &end, 10);
  405. return (!errno && !*end);
  406. }
  407. int main(int argc, char* argv[])
  408. {
  409. mozilla::UniquePtr<SzipAction> action;
  410. char **firstArg;
  411. bool compress = true;
  412. size_t chunkSize = 0;
  413. SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
  414. size_t dictSize = (size_t) 0;
  415. Logging::Init();
  416. for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
  417. if (!firstArg[0] || firstArg[0][0] != '-')
  418. break;
  419. if (strcmp(firstArg[0], "-d") == 0) {
  420. compress = false;
  421. } else if (strcmp(firstArg[0], "-c") == 0) {
  422. firstArg++;
  423. argc--;
  424. if (!firstArg[0])
  425. break;
  426. if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
  427. (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
  428. ERROR("Invalid chunk size");
  429. return 1;
  430. }
  431. } else if (strcmp(firstArg[0], "-f") == 0) {
  432. firstArg++;
  433. argc--;
  434. if (!firstArg[0])
  435. break;
  436. bool matched = false;
  437. for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
  438. if (strcmp(firstArg[0], filterName[i]) == 0) {
  439. filter = static_cast<SeekableZStream::FilterId>(i);
  440. matched = true;
  441. break;
  442. }
  443. }
  444. if (!matched) {
  445. ERROR("Invalid filter");
  446. return 1;
  447. }
  448. } else if (strcmp(firstArg[0], "-D") == 0) {
  449. firstArg++;
  450. argc--;
  451. if (!firstArg[0])
  452. break;
  453. if (strcmp(firstArg[0], "auto") == 0) {
  454. dictSize = -1;
  455. } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
  456. ERROR("Invalid dictionary size");
  457. return 1;
  458. }
  459. }
  460. }
  461. if (argc != 2 || !firstArg[0]) {
  462. LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
  463. argv[0]);
  464. return 1;
  465. }
  466. if (compress) {
  467. action.reset(new SzipCompress(chunkSize, filter, dictSize));
  468. } else {
  469. if (chunkSize) {
  470. ERROR("-c is incompatible with -d");
  471. return 1;
  472. }
  473. if (dictSize) {
  474. ERROR("-D is incompatible with -d");
  475. return 1;
  476. }
  477. action.reset(new SzipDecompress());
  478. }
  479. std::stringstream tmpOutStream;
  480. tmpOutStream << firstArg[0] << ".sz." << getpid();
  481. std::string tmpOut(tmpOutStream.str());
  482. int ret;
  483. struct stat st;
  484. {
  485. FileBuffer origBuf;
  486. if (!origBuf.Init(firstArg[0])) {
  487. ERROR("Couldn't open %s: %s", firstArg[0], strerror(errno));
  488. return 1;
  489. }
  490. ret = fstat(origBuf.getFd(), &st);
  491. if (ret == -1) {
  492. ERROR("Couldn't stat %s: %s", firstArg[0], strerror(errno));
  493. return 1;
  494. }
  495. size_t origSize = st.st_size;
  496. /* Mmap the original file */
  497. if (!origBuf.Resize(origSize)) {
  498. ERROR("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
  499. return 1;
  500. }
  501. /* Create the compressed file */
  502. FileBuffer outBuf;
  503. if (!outBuf.Init(tmpOut.c_str(), true)) {
  504. ERROR("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
  505. return 1;
  506. }
  507. ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
  508. if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
  509. st.st_size = 0;
  510. }
  511. }
  512. if ((ret == 0) && st.st_size) {
  513. rename(tmpOut.c_str(), firstArg[0]);
  514. } else {
  515. unlink(tmpOut.c_str());
  516. }
  517. return ret;
  518. }