#include <string>
#include <fstream>
+#include <stdint.h>
namespace Trace {
Read,
Write
};
+ struct Offset {
+ Offset()
+ : chunk(0),
+ offsetInChunk(0)
+ {}
+ uint64_t chunk;
+ uint32_t offsetInChunk;
+ };
+
public:
static bool isZLibCompressed(const std::string &filename);
static bool isSnappyCompressed(const std::string &filename);
std::string filename() const;
bool open(const std::string &filename, File::Mode mode);
- bool write(const void *buffer, int length);
- bool read(void *buffer, int length);
+ bool write(const void *buffer, size_t length);
+ bool read(void *buffer, size_t length);
void close();
void flush(void);
int getc();
+ bool skip(unsigned length);
+ virtual bool supportsOffsets() const = 0;
+ virtual File::Offset currentOffset();
+ virtual void setCurrentOffset(const File::Offset &offset);
protected:
virtual bool rawOpen(const std::string &filename, File::Mode mode) = 0;
- virtual bool rawWrite(const void *buffer, int length) = 0;
- virtual bool rawRead(void *buffer, int length) = 0;
+ virtual bool rawWrite(const void *buffer, size_t length) = 0;
+ virtual bool rawRead(void *buffer, size_t length) = 0;
virtual int rawGetc() = 0;
virtual void rawClose() = 0;
virtual void rawFlush() = 0;
+ virtual bool rawSkip(unsigned length) = 0;
protected:
std::string m_filename;
return m_isOpened;
}
- inline bool File::write(const void *buffer, int length)
+ inline bool File::write(const void *buffer, size_t length)
{
if (!m_isOpened || m_mode != File::Write) {
return false;
return rawWrite(buffer, length);
}
- inline bool File::read(void *buffer, int length)
+ inline bool File::read(void *buffer, size_t length)
{
if (!m_isOpened || m_mode != File::Read) {
return false;
return rawGetc();
}
+inline bool File::skip(unsigned length)
+{
+ if (!m_isOpened || m_mode != File::Read) {
+ return false;
+ }
+ return rawSkip(length);
+}
+
class ZLibFile : public File {
public:
ZLibFile(const std::string &filename = std::string(),
File::Mode mode = File::Read);
virtual ~ZLibFile();
+
+ virtual bool supportsOffsets() const;
protected:
virtual bool rawOpen(const std::string &filename, File::Mode mode);
- virtual bool rawWrite(const void *buffer, int length);
- virtual bool rawRead(void *buffer, int length);
+ virtual bool rawWrite(const void *buffer, size_t length);
+ virtual bool rawRead(void *buffer, size_t length);
virtual int rawGetc();
virtual void rawClose();
virtual void rawFlush();
+ virtual bool rawSkip(unsigned length);
private:
void *m_gzFile;
};
+inline bool
+operator<(const File::Offset &one, const File::Offset &two)
+{
+ return one.chunk < two.chunk ||
+ (one.chunk == two.chunk && one.offsetInChunk < two.offsetInChunk);
+}
+
+inline bool
+operator==(const File::Offset &one, const File::Offset &two)
+{
+ return one.chunk == two.chunk &&
+ one.offsetInChunk == two.offsetInChunk;
+}
+
+inline bool
+operator>=(const File::Offset &one, const File::Offset &two)
+{
+ return one.chunk > two.chunk ||
+ (one.chunk == two.chunk && one.offsetInChunk >= two.offsetInChunk);
+}
+
+inline bool
+operator>(const File::Offset &one, const File::Offset &two)
+{
+ return two < one;
+}
+
+inline bool
+operator<=(const File::Offset &one, const File::Offset &two)
+{
+ return two >= one;
+}
+
+
}
#endif
#include <snappy.h>
+#include <iostream>
+
#include <assert.h>
#include <string.h>
* The file is composed of a number of chunks, they are:
* chunk {
* uint32 - specifying the length of the compressed data
- * compressed data
+ * compressed data, in little endian
* }
* File can contain any number of such chunks.
* The default size of an uncompressed chunk is specified in
SnappyFile::~SnappyFile()
{
delete [] m_compressedCache;
+ delete [] m_cache;
}
bool SnappyFile::rawOpen(const std::string &filename, File::Mode mode)
return m_stream.is_open();
}
- bool SnappyFile::rawWrite(const void *buffer, int length)
+ bool SnappyFile::rawWrite(const void *buffer, size_t length)
{
if (freeCacheSize() > length) {
memcpy(m_cachePtr, buffer, length);
while (sizeToWrite >= freeCacheSize()) {
int endSize = freeCacheSize();
int offset = length - sizeToWrite;
- memcpy(m_cachePtr, (char*)buffer + offset, endSize);
+ memcpy(m_cachePtr, (const char*)buffer + offset, endSize);
sizeToWrite -= endSize;
m_cachePtr += endSize;
flushCache();
}
if (sizeToWrite) {
int offset = length - sizeToWrite;
- memcpy(m_cachePtr, (char*)buffer + offset, sizeToWrite);
+ memcpy(m_cachePtr, (const char*)buffer + offset, sizeToWrite);
m_cachePtr += sizeToWrite;
}
}
return true;
}
- bool SnappyFile::rawRead(void *buffer, int length)
+ bool SnappyFile::rawRead(void *buffer, size_t length)
{
if (endOfData()) {
return false;
memcpy(buffer, m_cachePtr, length);
m_cachePtr += length;
} else {
- int sizeToRead = length;
- int offset = 0;
+ size_t sizeToRead = length;
+ size_t offset = 0;
while (sizeToRead) {
- int chunkSize = std::min(freeCacheSize(), sizeToRead);
+ size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
offset = length - sizeToRead;
memcpy((char*)buffer + offset, m_cachePtr, chunkSize);
m_cachePtr += chunkSize;
void SnappyFile::flushCache()
{
if (m_mode == File::Write) {
- size_t compressedLength;
+ size_t inputLength = usedCacheSize();
- ::snappy::RawCompress(m_cache, SNAPPY_CHUNK_SIZE - freeCacheSize(),
- m_compressedCache, &compressedLength);
+ if (inputLength) {
+ size_t compressedLength;
- writeCompressedLength(compressedLength);
- m_stream.write(m_compressedCache, compressedLength);
- m_cachePtr = m_cache;
+ ::snappy::RawCompress(m_cache, inputLength,
+ m_compressedCache, &compressedLength);
+
+ writeCompressedLength(compressedLength);
+ m_stream.write(m_compressedCache, compressedLength);
+ m_cachePtr = m_cache;
+ }
+ assert(m_cachePtr == m_cache);
} else if (m_mode == File::Read) {
- if (m_stream.eof())
- return;
//assert(m_cachePtr == m_cache + m_cacheSize);
+ m_currentOffset.chunk = m_stream.tellg();
size_t compressedLength;
compressedLength = readCompressedLength();
- m_stream.read((char*)m_compressedCache, compressedLength);
- /*
- * The reason we peek here is because the last read will
- * read all the way until the last character, but that will not
- * trigger m_stream.eof() to be set, so by calling peek
- * we assure that if we in fact have read the entire stream
- * then the m_stream.eof() is always set.
- */
- m_stream.peek();
- ::snappy::GetUncompressedLength(m_compressedCache, compressedLength,
- &m_cacheSize);
- if (m_cache)
- delete [] m_cache;
- createCache(m_cacheSize);
- ::snappy::RawUncompress(m_compressedCache, compressedLength,
- m_cache);
+
+ if (compressedLength) {
+ m_stream.read((char*)m_compressedCache, compressedLength);
+ ::snappy::GetUncompressedLength(m_compressedCache, compressedLength,
+ &m_cacheSize);
+ createCache(m_cacheSize);
+ ::snappy::RawUncompress(m_compressedCache, compressedLength,
+ m_cache);
+ } else {
+ createCache(0);
+ }
}
}
void SnappyFile::createCache(size_t size)
{
- m_cache = new char[size];
+ // TODO: only re-allocate if the current buffer is not big enough
+
+ if (m_cache) {
+ delete [] m_cache;
+ }
+
+ if (size) {
+ m_cache = new char[size];
+ } else {
+ m_cache = NULL;
+ }
+
m_cachePtr = m_cache;
m_cacheSize = size;
}
- void SnappyFile::writeCompressedLength(uint32_t value)
+ void SnappyFile::writeCompressedLength(size_t length)
{
- m_stream.write((const char*)&value, sizeof value);
+ unsigned char buf[4];
+ buf[0] = length & 0xff; length >>= 8;
+ buf[1] = length & 0xff; length >>= 8;
+ buf[2] = length & 0xff; length >>= 8;
+ buf[3] = length & 0xff; length >>= 8;
+ assert(length == 0);
+ m_stream.write((const char *)buf, sizeof buf);
}
- uint32_t SnappyFile::readCompressedLength()
+ size_t SnappyFile::readCompressedLength()
{
- uint32_t len;
- m_stream.read((char*)&len, sizeof len);
- return len;
+ unsigned char buf[4];
+ size_t length;
+ m_stream.read((char *)buf, sizeof buf);
+ if (m_stream.fail()) {
+ length = 0;
+ } else {
+ length = (size_t)buf[0];
+ length |= ((size_t)buf[1] << 8);
+ length |= ((size_t)buf[2] << 16);
+ length |= ((size_t)buf[3] << 24);
+ }
+ return length;
}
+
+bool SnappyFile::supportsOffsets() const
+{
+ return true;
+}
+
+File::Offset SnappyFile::currentOffset()
+{
+ m_currentOffset.offsetInChunk = m_cachePtr - m_cache;
+ return m_currentOffset;
+}
+
+void SnappyFile::setCurrentOffset(const File::Offset &offset)
+{
+ // to remove eof bit
+ m_stream.clear();
+ // seek to the start of a chunk
+ m_stream.seekg(offset.chunk, std::ios::beg);
+ // load the chunk
+ flushCache();
+ assert(m_cacheSize >= offset.offsetInChunk);
+ // seek within our cache to the correct location within the chunk
+ m_cachePtr = m_cache + offset.offsetInChunk;
+
+}
+
+bool SnappyFile::rawSkip(unsigned length)
+{
+ if (endOfData()) {
+ return false;
+ }
+
+ if (freeCacheSize() >= length) {
+ m_cachePtr += length;
+ } else {
+ int sizeToRead = length;
+ while (sizeToRead) {
+ int chunkSize = std::min(freeCacheSize(), sizeToRead);
+ m_cachePtr += chunkSize;
+ sizeToRead -= chunkSize;
+ if (sizeToRead > 0)
+ flushCache();
+ if (!m_cacheSize)
+ break;
+ }
+ }
+
+ return true;
+}
#ifndef TRACE_SNAPPYFILE_HPP
#define TRACE_SNAPPYFILE_HPP
+ #include <assert.h>
+
#include "trace_file.hpp"
#include <string>
#include <fstream>
- #include <stdint.h>
-
namespace snappy {
class File;
}
File::Mode mode = File::Read);
virtual ~SnappyFile();
+ virtual bool supportsOffsets() const;
+ virtual File::Offset currentOffset();
+ virtual void setCurrentOffset(const File::Offset &offset);
protected:
virtual bool rawOpen(const std::string &filename, File::Mode mode);
- virtual bool rawWrite(const void *buffer, int length);
- virtual bool rawRead(void *buffer, int length);
+ virtual bool rawWrite(const void *buffer, size_t length);
+ virtual bool rawRead(void *buffer, size_t length);
virtual int rawGetc();
virtual void rawClose();
virtual void rawFlush();
+ virtual bool rawSkip(unsigned length);
private:
- inline int freeCacheSize() const
+ inline size_t usedCacheSize() const
+ {
+ assert(m_cachePtr >= m_cache);
+ return m_cachePtr - m_cache;
+ }
+ inline size_t freeCacheSize() const
{
- if (m_cacheSize > 0)
- return m_cacheSize - (m_cachePtr - m_cache);
- else
+ assert(m_cacheSize >= usedCacheSize());
+ if (m_cacheSize > 0) {
+ return m_cacheSize - usedCacheSize();
+ } else {
return 0;
+ }
}
inline bool endOfData() const
{
}
void flushCache();
void createCache(size_t size);
- void writeCompressedLength(uint32_t num);
- uint32_t readCompressedLength();
+ void writeCompressedLength(size_t length);
+ size_t readCompressedLength();
private:
std::fstream m_stream;
char *m_cache;
size_t m_cacheSize;
char *m_compressedCache;
+
+ File::Offset m_currentOffset;
};
}