123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include <algorithm>
- #include <map>
- #include <sys/stat.h>
- #include <string>
- #include <sstream>
- #include <cstring>
- #include <cstdlib>
- #include <zlib.h>
- #include <fcntl.h>
- #include <errno.h>
- #include "mozilla/Assertions.h"
- #include "mozilla/Scoped.h"
- #include "mozilla/UniquePtr.h"
- #include "SeekableZStream.h"
- #include "Utils.h"
- #include "Logging.h"
- Logging Logging::Singleton;
- const char *filterName[] = {
- "none",
- "thumb",
- "arm",
- "x86",
- "auto"
- };
- /* Maximum supported size for chunkSize */
- static const size_t maxChunkSize =
- 1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
- sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);
- class Buffer: public MappedPtr
- {
- public:
- virtual ~Buffer() { }
- virtual bool Resize(size_t size)
- {
- MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, -1, 0);
- if (buf == MAP_FAILED)
- return false;
- if (*this != MAP_FAILED)
- memcpy(buf, *this, std::min(size, GetLength()));
- Assign(buf);
- return true;
- }
- bool Fill(Buffer &other)
- {
- size_t size = other.GetLength();
- if (!size || !Resize(size))
- return false;
- memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
- return true;
- }
- };
- class FileBuffer: public Buffer
- {
- public:
- bool Init(const char *name, bool writable_ = false)
- {
- fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
- if (fd == -1)
- return false;
- writable = writable_;
- return true;
- }
- virtual bool Resize(size_t size)
- {
- if (writable) {
- if (ftruncate(fd, size) == -1)
- return false;
- }
- Assign(MemoryRange::mmap(nullptr, size,
- PROT_READ | (writable ? PROT_WRITE : 0),
- writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
- return this != MAP_FAILED;
- }
- int getFd()
- {
- return fd;
- }
- private:
- AutoCloseFD fd;
- bool writable;
- };
- class FilteredBuffer: public Buffer
- {
- public:
- void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
- {
- SeekableZStream::ZStreamFilter filterCB =
- SeekableZStream::GetFilter(filter);
- MOZ_ASSERT(filterCB);
- Fill(other);
- size_t size = other.GetLength();
- Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
- size_t avail = 0;
- /* Filter needs to be applied in chunks. */
- while (size) {
- avail = std::min(size, chunkSize);
- filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
- SeekableZStream::FILTER, data, avail);
- size -= avail;
- data += avail;
- }
- }
- };
- template <typename T>
- class Dictionary: public Buffer
- {
- typedef T piece;
- typedef std::pair<piece, int> stat_pair;
- static bool stat_cmp(stat_pair a, stat_pair b)
- {
- return a.second < b.second;
- }
- public:
- Dictionary(Buffer &inBuf, size_t size)
- {
- if (!size || !Resize(size))
- return;
- DEBUG_LOG("Creating dictionary");
- piece *origBufPieces = reinterpret_cast<piece *>(
- static_cast<void *>(inBuf));
- std::map<piece, int> stats;
- for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
- stats[origBufPieces[i]]++;
- }
- std::vector<stat_pair> statsVec(stats.begin(), stats.end());
- std::sort(statsVec.begin(), statsVec.end(), stat_cmp);
- piece *dictPieces = reinterpret_cast<piece *>(
- static_cast<void *>(*this));
- typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
- for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
- i--, ++it) {
- dictPieces[i - 1] = it->first;
- }
- }
- };
- class SzipAction
- {
- public:
- virtual int run(const char *name, Buffer &origBuf,
- const char *outName, Buffer &outBuf) = 0;
- virtual ~SzipAction() {}
- };
- class SzipDecompress: public SzipAction
- {
- public:
- int run(const char *name, Buffer &origBuf,
- const char *outName, Buffer &outBuf);
- };
- class SzipCompress: public SzipAction
- {
- public:
- int run(const char *name, Buffer &origBuf,
- const char *outName, Buffer &outBuf);
- SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
- size_t aDictSize)
- : chunkSize(aChunkSize ? aChunkSize : 16384)
- , filter(aFilter)
- , dictSize(aDictSize)
- {}
- const static signed char winSizeLog = 15;
- const static size_t winSize = 1 << winSizeLog;
- const static SeekableZStream::FilterId DEFAULT_FILTER =
- #if defined(TARGET_THUMB)
- SeekableZStream::BCJ_THUMB;
- #elif defined(TARGET_ARM)
- SeekableZStream::BCJ_ARM;
- #elif defined(TARGET_X86)
- SeekableZStream::BCJ_X86;
- #else
- SeekableZStream::NONE;
- #endif
- private:
- int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
- size_t aDictSize, SeekableZStream::FilterId aFilter);
- size_t chunkSize;
- SeekableZStream::FilterId filter;
- size_t dictSize;
- };
- /* Decompress a seekable compressed stream */
- int SzipDecompress::run(const char *name, Buffer &origBuf,
- const char *outName, Buffer &outBuf)
- {
- size_t origSize = origBuf.GetLength();
- if (origSize < sizeof(SeekableZStreamHeader)) {
- ERROR("%s is not compressed", name);
- return 0;
- }
- SeekableZStream zstream;
- if (!zstream.Init(origBuf, origSize))
- return 0;
- size_t size = zstream.GetUncompressedSize();
- /* Give enough room for the uncompressed data */
- if (!outBuf.Resize(size)) {
- ERROR("Error resizing %s: %s", outName, strerror(errno));
- return 1;
- }
- if (!zstream.Decompress(outBuf, 0, size))
- return 1;
- return 0;
- }
- /* Generate a seekable compressed stream. */
- int SzipCompress::run(const char *name, Buffer &origBuf,
- const char *outName, Buffer &outBuf)
- {
- size_t origSize = origBuf.GetLength();
- if (origSize == 0) {
- ERROR("Won't compress %s: it's empty", name);
- return 1;
- }
- if (SeekableZStreamHeader::validate(origBuf)) {
- WARN("Skipping %s: it's already a szip", name);
- return 0;
- }
- bool compressed = false;
- LOG("Size = %" PRIuSize, origSize);
- /* Allocate a buffer the size of the uncompressed data: we don't want
- * a compressed file larger than that anyways. */
- if (!outBuf.Resize(origSize)) {
- ERROR("Couldn't allocate output buffer: %s", strerror(errno));
- return 1;
- }
- /* Find the most appropriate filter */
- SeekableZStream::FilterId firstFilter, lastFilter;
- bool scanFilters;
- if (filter == SeekableZStream::FILTER_MAX) {
- firstFilter = SeekableZStream::NONE;
- lastFilter = SeekableZStream::FILTER_MAX;
- scanFilters = true;
- } else {
- firstFilter = lastFilter = filter;
- ++lastFilter;
- scanFilters = false;
- }
- mozilla::UniquePtr<Buffer> filteredBuf;
- Buffer *origData;
- for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
- mozilla::UniquePtr<FilteredBuffer> filteredTmp;
- Buffer tmpBuf;
- if (f != SeekableZStream::NONE) {
- DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
- filteredTmp = mozilla::MakeUnique<FilteredBuffer>();
- filteredTmp->Filter(origBuf, f, chunkSize);
- origData = filteredTmp.get();
- } else {
- origData = &origBuf;
- }
- if (dictSize && !scanFilters) {
- filteredBuf = mozilla::Move(filteredTmp);
- break;
- }
- DEBUG_LOG("Compressing with no dictionary");
- if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
- if (tmpBuf.GetLength() < outBuf.GetLength()) {
- outBuf.Fill(tmpBuf);
- compressed = true;
- filter = f;
- filteredBuf = mozilla::Move(filteredTmp);
- continue;
- }
- }
- }
- origData = filteredBuf ? filteredBuf.get() : &origBuf;
- if (dictSize) {
- Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);
- /* Find the most appropriate dictionary size */
- size_t firstDictSize, lastDictSize;
- if (dictSize == (size_t) -1) {
- /* If we scanned for filters, we effectively already tried dictSize=0 */
- firstDictSize = scanFilters ? 4096 : 0;
- lastDictSize = SzipCompress::winSize;
- } else {
- firstDictSize = lastDictSize = dictSize;
- }
- Buffer tmpBuf;
- for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
- DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
- if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
- + SzipCompress::winSize - d, d, filter))
- continue;
- if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
- outBuf.Fill(tmpBuf);
- compressed = true;
- dictSize = d;
- }
- }
- }
- if (!compressed) {
- outBuf.Fill(origBuf);
- LOG("Not compressed");
- return 0;
- }
- if (dictSize == (size_t) -1)
- dictSize = 0;
- DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
- filterName[filter], dictSize);
- LOG("Compressed size is %" PRIuSize, outBuf.GetLength());
- /* Sanity check */
- Buffer tmpBuf;
- SzipDecompress decompress;
- if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
- return 1;
- size_t size = tmpBuf.GetLength();
- if (size != origSize) {
- ERROR("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
- return 1;
- }
- if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
- ERROR("Compression error: content mismatch");
- return 1;
- }
- return 0;
- }
- int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
- const unsigned char *aDict, size_t aDictSize,
- SeekableZStream::FilterId aFilter)
- {
- size_t origSize = origBuf.GetLength();
- MOZ_ASSERT(origSize != 0);
- /* Expected total number of chunks */
- size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);
- /* The first chunk is going to be stored after the header, the dictionary
- * and the offset table */
- size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
- + nChunks * sizeof(uint32_t);
- if (offset >= origSize)
- return 1;
- /* Allocate a buffer the size of the uncompressed data: we don't want
- * a compressed file larger than that anyways. */
- if (!outBuf.Resize(origSize)) {
- ERROR("Couldn't allocate output buffer: %s", strerror(errno));
- return 1;
- }
- SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
- unsigned char *dictionary = static_cast<unsigned char *>(
- outBuf + sizeof(SeekableZStreamHeader));
- le_uint32 *entry =
- reinterpret_cast<le_uint32 *>(dictionary + aDictSize);
- /* Initialize header */
- header->chunkSize = chunkSize;
- header->dictSize = aDictSize;
- header->totalSize = offset;
- header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
- // window size of 32k.
- header->filter = aFilter;
- if (aDictSize)
- memcpy(dictionary, aDict, aDictSize);
- /* Initialize zlib structure */
- z_stream zStream;
- memset(&zStream, 0, sizeof(zStream));
- zStream.avail_out = origSize - offset;
- zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
- size_t avail = 0;
- size_t size = origSize;
- unsigned char *data = reinterpret_cast<unsigned char *>(
- static_cast<void *>(origBuf));
- while (size) {
- avail = std::min(size, chunkSize);
- /* Compress chunk */
- int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
- MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
- if (aDictSize)
- deflateSetDictionary(&zStream, dictionary, aDictSize);
- MOZ_ASSERT(ret == Z_OK);
- zStream.avail_in = avail;
- zStream.next_in = data;
- ret = deflate(&zStream, Z_FINISH);
- /* Under normal conditions, deflate returns Z_STREAM_END. If there is not
- * enough room to compress, deflate returns Z_OK and avail_out is 0. We
- * still want to deflateEnd in that case, so fall through. It will bail
- * on the avail_out test that follows. */
- MOZ_ASSERT(ret == Z_STREAM_END || ret == Z_OK);
- ret = deflateEnd(&zStream);
- MOZ_ASSERT(ret == Z_OK);
- if (zStream.avail_out <= 0)
- return 1;
- size_t len = origSize - offset - zStream.avail_out;
- /* Adjust headers */
- header->totalSize += len;
- *entry++ = offset;
- header->nChunks++;
- /* Prepare for next iteration */
- size -= avail;
- data += avail;
- offset += len;
- }
- header->lastChunkSize = avail;
- MOZ_ASSERT(header->totalSize == offset);
- MOZ_ASSERT(header->nChunks == nChunks);
- if (!outBuf.Resize(offset)) {
- ERROR("Error truncating output: %s", strerror(errno));
- return 1;
- }
- return 0;
- }
- bool GetSize(const char *str, size_t *out)
- {
- char *end;
- MOZ_ASSERT(out);
- errno = 0;
- *out = strtol(str, &end, 10);
- return (!errno && !*end);
- }
- int main(int argc, char* argv[])
- {
- mozilla::UniquePtr<SzipAction> action;
- char **firstArg;
- bool compress = true;
- size_t chunkSize = 0;
- SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
- size_t dictSize = (size_t) 0;
- Logging::Init();
- for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
- if (!firstArg[0] || firstArg[0][0] != '-')
- break;
- if (strcmp(firstArg[0], "-d") == 0) {
- compress = false;
- } else if (strcmp(firstArg[0], "-c") == 0) {
- firstArg++;
- argc--;
- if (!firstArg[0])
- break;
- if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
- (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
- ERROR("Invalid chunk size");
- return 1;
- }
- } else if (strcmp(firstArg[0], "-f") == 0) {
- firstArg++;
- argc--;
- if (!firstArg[0])
- break;
- bool matched = false;
- for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
- if (strcmp(firstArg[0], filterName[i]) == 0) {
- filter = static_cast<SeekableZStream::FilterId>(i);
- matched = true;
- break;
- }
- }
- if (!matched) {
- ERROR("Invalid filter");
- return 1;
- }
- } else if (strcmp(firstArg[0], "-D") == 0) {
- firstArg++;
- argc--;
- if (!firstArg[0])
- break;
- if (strcmp(firstArg[0], "auto") == 0) {
- dictSize = -1;
- } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
- ERROR("Invalid dictionary size");
- return 1;
- }
- }
- }
- if (argc != 2 || !firstArg[0]) {
- LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
- argv[0]);
- return 1;
- }
- if (compress) {
- action.reset(new SzipCompress(chunkSize, filter, dictSize));
- } else {
- if (chunkSize) {
- ERROR("-c is incompatible with -d");
- return 1;
- }
- if (dictSize) {
- ERROR("-D is incompatible with -d");
- return 1;
- }
- action.reset(new SzipDecompress());
- }
- std::stringstream tmpOutStream;
- tmpOutStream << firstArg[0] << ".sz." << getpid();
- std::string tmpOut(tmpOutStream.str());
- int ret;
- struct stat st;
- {
- FileBuffer origBuf;
- if (!origBuf.Init(firstArg[0])) {
- ERROR("Couldn't open %s: %s", firstArg[0], strerror(errno));
- return 1;
- }
- ret = fstat(origBuf.getFd(), &st);
- if (ret == -1) {
- ERROR("Couldn't stat %s: %s", firstArg[0], strerror(errno));
- return 1;
- }
- size_t origSize = st.st_size;
- /* Mmap the original file */
- if (!origBuf.Resize(origSize)) {
- ERROR("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
- return 1;
- }
- /* Create the compressed file */
- FileBuffer outBuf;
- if (!outBuf.Init(tmpOut.c_str(), true)) {
- ERROR("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
- return 1;
- }
- ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
- if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
- st.st_size = 0;
- }
- }
- if ((ret == 0) && st.st_size) {
- rename(tmpOut.c_str(), firstArg[0]);
- } else {
- unlink(tmpOut.c_str());
- }
- return ret;
- }
|