X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=trace_snappyfile.cpp;h=4dbe42dc364264a8db0e717a1af17681b9af271e;hb=2b1bd4f71daba2570525eb63aa7b3b66e27259a7;hp=f8016856b31c0550e300e2a180a1b1fe32f56ff2;hpb=94194a24eaa9f559b9140a89433f0d3ab3b77ef3;p=apitrace diff --git a/trace_snappyfile.cpp b/trace_snappyfile.cpp index f801685..4dbe42d 100644 --- a/trace_snappyfile.cpp +++ b/trace_snappyfile.cpp @@ -28,9 +28,10 @@ #include +#include + #include #include -#include using namespace Trace; @@ -45,7 +46,7 @@ using namespace Trace; * The file is composed of a number of chunks, they are: * chunk { * uint32 - specifying the length of the compressed data - * compressed data + * compressed data, in little endian * } * File can contain any number of such chunks. * The default size of an uncompressed chunk is specified in @@ -75,6 +76,7 @@ SnappyFile::SnappyFile(const std::string &filename, SnappyFile::~SnappyFile() { delete [] m_compressedCache; + delete [] m_cache; } bool SnappyFile::rawOpen(const std::string &filename, File::Mode mode) @@ -91,13 +93,17 @@ bool SnappyFile::rawOpen(const std::string &filename, File::Mode mode) //read in the initial buffer if we're reading if (m_stream.is_open() && mode == File::Read) { + m_stream.seekg(0, std::ios::end); + m_endPos = m_stream.tellg(); + m_stream.seekg(0, std::ios::beg); + // read the snappy file identifier unsigned char byte1, byte2; m_stream >> byte1; m_stream >> byte2; assert(byte1 == SNAPPY_BYTE1 && byte2 == SNAPPY_BYTE2); - flushCache(); + flushReadCache(); } else if (m_stream.is_open() && mode == File::Write) { // write the snappy file identifier m_stream << SNAPPY_BYTE1; @@ -114,7 +120,7 @@ bool SnappyFile::rawWrite(const void *buffer, size_t length) } else if (freeCacheSize() == length) { memcpy(m_cachePtr, buffer, length); m_cachePtr += length; - flushCache(); + flushWriteCache(); } else { int sizeToWrite = length; @@ -124,7 +130,7 @@ bool SnappyFile::rawWrite(const void *buffer, size_t length) memcpy(m_cachePtr, (const char*)buffer + offset, endSize); sizeToWrite -= endSize; m_cachePtr += endSize; - flushCache(); + flushWriteCache(); } if (sizeToWrite) { int offset = length - sizeToWrite; @@ -154,10 +160,12 @@ bool SnappyFile::rawRead(void *buffer, size_t length) memcpy((char*)buffer + offset, m_cachePtr, chunkSize); m_cachePtr += chunkSize; sizeToRead -= chunkSize; - if (sizeToRead > 0) - flushCache(); - if (!m_cacheSize) + if (sizeToRead > 0) { + flushReadCache(); + } + if (!m_cacheSize) { break; + } } } @@ -174,7 +182,9 @@ int SnappyFile::rawGetc() void SnappyFile::rawClose() { - flushCache(); + if (m_mode == File::Write) { + flushWriteCache(); + } m_stream.close(); delete [] m_cache; m_cache = NULL; @@ -183,63 +193,146 @@ void SnappyFile::rawClose() void SnappyFile::rawFlush() { - flushCache(); + assert(m_mode == File::Write); + flushWriteCache(); m_stream.flush(); } -void SnappyFile::flushCache() +void SnappyFile::flushWriteCache() { - if (m_mode == File::Write) { + size_t inputLength = usedCacheSize(); + + if (inputLength) { size_t compressedLength; - ::snappy::RawCompress(m_cache, SNAPPY_CHUNK_SIZE - freeCacheSize(), + ::snappy::RawCompress(m_cache, inputLength, m_compressedCache, &compressedLength); writeCompressedLength(compressedLength); m_stream.write(m_compressedCache, compressedLength); m_cachePtr = m_cache; - } else if (m_mode == File::Read) { - if (m_stream.eof()) - return; - //assert(m_cachePtr == m_cache + m_cacheSize); - size_t compressedLength; - compressedLength = readCompressedLength(); + } + assert(m_cachePtr == m_cache); +} + +void SnappyFile::flushReadCache(size_t skipLength) +{ + //assert(m_cachePtr == m_cache + m_cacheSize); + m_currentOffset.chunk = m_stream.tellg(); + size_t compressedLength; + compressedLength = readCompressedLength(); + + if (compressedLength) { m_stream.read((char*)m_compressedCache, compressedLength); - /* - * The reason we peek here is because the last read will - * read all the way until the last character, but that will not - * trigger m_stream.eof() to be set, so by calling peek - * we assure that if we in fact have read the entire stream - * then the m_stream.eof() is always set. - */ - m_stream.peek(); ::snappy::GetUncompressedLength(m_compressedCache, compressedLength, &m_cacheSize); - if (m_cache) - delete [] m_cache; createCache(m_cacheSize); - ::snappy::RawUncompress(m_compressedCache, compressedLength, - m_cache); + if (skipLength < m_cacheSize) { + ::snappy::RawUncompress(m_compressedCache, compressedLength, + m_cache); + } + } else { + createCache(0); } } void SnappyFile::createCache(size_t size) { - m_cache = new char[size]; + // TODO: only re-allocate if the current buffer is not big enough + + if (m_cache) { + delete [] m_cache; + } + + if (size) { + m_cache = new char[size]; + } else { + m_cache = NULL; + } + m_cachePtr = m_cache; m_cacheSize = size; } void SnappyFile::writeCompressedLength(size_t length) { - uint32_t value = length; - assert(value == length); - m_stream.write((const char*)&value, sizeof value); + unsigned char buf[4]; + buf[0] = length & 0xff; length >>= 8; + buf[1] = length & 0xff; length >>= 8; + buf[2] = length & 0xff; length >>= 8; + buf[3] = length & 0xff; length >>= 8; + assert(length == 0); + m_stream.write((const char *)buf, sizeof buf); } size_t SnappyFile::readCompressedLength() { - uint32_t length = 0; - m_stream.read((char*)&length, sizeof length); + unsigned char buf[4]; + size_t length; + m_stream.read((char *)buf, sizeof buf); + if (m_stream.fail()) { + length = 0; + } else { + length = (size_t)buf[0]; + length |= ((size_t)buf[1] << 8); + length |= ((size_t)buf[2] << 16); + length |= ((size_t)buf[3] << 24); + } return length; } + +bool SnappyFile::supportsOffsets() const +{ + return true; +} + +File::Offset SnappyFile::currentOffset() +{ + m_currentOffset.offsetInChunk = m_cachePtr - m_cache; + return m_currentOffset; +} + +void SnappyFile::setCurrentOffset(const File::Offset &offset) +{ + // to remove eof bit + m_stream.clear(); + // seek to the start of a chunk + m_stream.seekg(offset.chunk, std::ios::beg); + // load the chunk + flushReadCache(); + assert(m_cacheSize >= offset.offsetInChunk); + // seek within our cache to the correct location within the chunk + m_cachePtr = m_cache + offset.offsetInChunk; + +} + +bool SnappyFile::rawSkip(size_t length) +{ + if (endOfData()) { + return false; + } + + if (freeCacheSize() >= length) { + m_cachePtr += length; + } else { + size_t sizeToRead = length; + while (sizeToRead) { + size_t chunkSize = std::min(freeCacheSize(), sizeToRead); + m_cachePtr += chunkSize; + sizeToRead -= chunkSize; + if (sizeToRead > 0) { + flushReadCache(sizeToRead); + } + if (!m_cacheSize) { + break; + } + } + } + + return true; +} + +int SnappyFile::rawPercentRead() +{ + return 100 * (double(m_stream.tellg()) / double(m_endPos)); +}