From 46c4a32730c5b52c119376042a84a6773d8d789f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 2 Sep 2011 01:08:49 -0400 Subject: [PATCH] Implement scanning/skipping of fragments of the trace for the initial scan. unfortunately it's not the improvement we were looking for. (from 11.9 to about 9.2 secs on a 240mb trace). --- trace_file.cpp | 5 + trace_file.hpp | 11 ++ trace_loader.cpp | 3 +- trace_parser.cpp | 307 +++++++++++++++++++++++++++++++++++++++++-- trace_parser.hpp | 41 ++++++ trace_snappyfile.cpp | 24 ++++ trace_snappyfile.hpp | 1 + 7 files changed, 381 insertions(+), 11 deletions(-) diff --git a/trace_file.cpp b/trace_file.cpp index d4e74b9..652cd0f 100644 --- a/trace_file.cpp +++ b/trace_file.cpp @@ -149,3 +149,8 @@ bool ZLibFile::supportsOffsets() const { return false; } + +bool ZLibFile::rawSkip(unsigned) +{ + return false; +} diff --git a/trace_file.hpp b/trace_file.hpp index 2129d1e..13b392c 100644 --- a/trace_file.hpp +++ b/trace_file.hpp @@ -67,6 +67,7 @@ public: void close(); void flush(void); int getc(); + bool skip(unsigned length); virtual bool supportsOffsets() const = 0; virtual File::Offset currentOffset(); @@ -78,6 +79,7 @@ protected: virtual int rawGetc() = 0; virtual void rawClose() = 0; virtual void rawFlush() = 0; + virtual bool rawSkip(unsigned length) = 0; protected: std::string m_filename; @@ -148,6 +150,14 @@ inline int File::getc() return rawGetc(); } +inline bool File::skip(unsigned length) +{ + if (!m_isOpened || m_mode != File::Read) { + return false; + } + return rawSkip(length); +} + class ZLibFile : public File { public: ZLibFile(const std::string &filename = std::string(), @@ -163,6 +173,7 @@ protected: virtual int rawGetc(); virtual void rawClose(); virtual void rawFlush(); + virtual bool rawSkip(unsigned length); private: void *m_gzFile; }; diff --git a/trace_loader.cpp b/trace_loader.cpp index 228553b..08066a9 100644 --- a/trace_loader.cpp +++ b/trace_loader.cpp @@ -61,8 +61,7 @@ bool Loader::open(const char *filename) startOffset = m_parser.currentOffset(); callNum = m_parser.currentCallNumber(); - while ((call = m_parser.parse_call())) { - + while ((call = m_parser.scan_call())) { ++numOfCalls; if (isCallAFrameMarker(call)) { diff --git a/trace_parser.cpp b/trace_parser.cpp index 66d56a6..95dbec5 100644 --- a/trace_parser.cpp +++ b/trace_parser.cpp @@ -163,10 +163,10 @@ void Parser::parse_enter(void) { m_callSigOffsets.insert(offset); } else { /* skip over the signature */ - read_string(); /* name */ + skip_string(); /* name */ int num_args = read_uint(); for (unsigned i = 0; i < num_args; ++i) { - read_string(); /*arg_name*/ + skip_string(); /*arg_name*/ } } } @@ -349,9 +349,8 @@ Value *Parser::parse_enum() { enums[id] = sig; m_enumSigOffsets.insert(offset); } else { - read_string(); /*name*/ - Value *value = parse_value(); - delete value; + skip_string(); /*name*/ + scan_value(); } } assert(sig); @@ -383,8 +382,8 @@ Value *Parser::parse_bitmask() { } else { int num_flags = read_uint(); for (int i = 0; i < num_flags; ++i) { - read_string(); /*name */ - read_uint(); /* value */ + skip_string(); /*name */ + skip_uint(); /* value */ } } } @@ -436,10 +435,10 @@ Value *Parser::parse_struct() { structs[id] = sig; m_structSigOffsets.insert(offset); } else { - read_string(); /* name */ + skip_string(); /* name */ unsigned num_members = read_uint(); for (unsigned i = 0; i < num_members; ++i) { - read_string(); /* member_name */ + skip_string(); /* member_name */ } } } @@ -527,4 +526,294 @@ inline bool Parser::bitmaskWithSignature(const File::Offset &offset) const return m_bitmaskSigOffsets.find(offset) != m_bitmaskSigOffsets.end(); } +Call * Parser::scan_call() +{ + do { + int c = read_byte(); + switch(c) { + case Trace::EVENT_ENTER: + scan_enter(); + break; + case Trace::EVENT_LEAVE: + return scan_leave(); + default: + std::cerr << "error: unknown event " << c << "\n"; + exit(1); + case -1: + for (CallList::iterator it = calls.begin(); it != calls.end(); ++it) { + std::cerr << "warning: incomplete call " << (*it)->name() << "\n"; + std::cerr << **it << "\n"; + } + return NULL; + } + } while (true); +} + +void Parser::scan_enter(void) { + size_t id = read_uint(); + + FunctionSig *sig = lookup(functions, id); + const File::Offset offset = file->currentOffset(); + if (!sig) { + sig = new FunctionSig; + sig->id = id; + sig->name = read_string(); + sig->num_args = read_uint(); + const char **arg_names = new const char *[sig->num_args]; + for (unsigned i = 0; i < sig->num_args; ++i) { + arg_names[i] = read_string(); + } + sig->arg_names = arg_names; + functions[id] = sig; + m_callSigOffsets.insert(offset); + } + assert(sig); + + Call *call = new Call(sig); + call->no = next_call_no++; + + if (scan_call_details(call)) { + calls.push_back(call); + } else { + delete call; + } +} + +Call *Parser::scan_leave(void) { + unsigned call_no = read_uint(); + Call *call = NULL; + for (CallList::iterator it = calls.begin(); it != calls.end(); ++it) { + if ((*it)->no == call_no) { + call = *it; + calls.erase(it); + break; + } + } + if (!call) { + return NULL; + } + + if (scan_call_details(call)) { + return call; + } else { + delete call; + return NULL; + } +} + +bool Parser::scan_call_details(Call *call) { + do { + int c = read_byte(); + switch(c) { + case Trace::CALL_END: + return true; + case Trace::CALL_ARG: + scan_arg(call); + break; + case Trace::CALL_RET: + scan_value(); + break; + default: + std::cerr << "error: ("<name()<< ") unknown call detail " + << c << "\n"; + exit(1); + case -1: + return false; + } + } while(true); +} + +void Parser::scan_arg(Call *call) { + skip_uint(); /* index */ + scan_value(); /* value */ +} + + +void Parser::scan_value(void) { + int c = read_byte(); + switch(c) { + case Trace::TYPE_NULL: + case Trace::TYPE_FALSE: + case Trace::TYPE_TRUE: + break; + case Trace::TYPE_SINT: + scan_sint(); + break; + case Trace::TYPE_UINT: + scan_uint(); + break; + case Trace::TYPE_FLOAT: + scan_float(); + break; + case Trace::TYPE_DOUBLE: + scan_double(); + break; + case Trace::TYPE_STRING: + scan_string(); + break; + case Trace::TYPE_ENUM: + scan_enum(); + break; + case Trace::TYPE_BITMASK: + scan_bitmask(); + break; + case Trace::TYPE_ARRAY: + scan_array(); + break; + case Trace::TYPE_STRUCT: + scan_struct(); + break; + case Trace::TYPE_BLOB: + scan_blob(); + break; + case Trace::TYPE_OPAQUE: + scan_opaque(); + break; + default: + std::cerr << "error: unknown type " << c << "\n"; + exit(1); + case -1: + break; + } +} + + +void Parser::scan_sint() { + skip_uint(); +} + + +void Parser::scan_uint() { + skip_uint(); +} + + +void Parser::scan_float() { + file->skip(sizeof(float)); +} + + +void Parser::scan_double() { + file->skip(sizeof(double)); +} + + +void Parser::scan_string() { + skip_string(); +} + + +void Parser::scan_enum() { + size_t id = read_uint(); + EnumSig *sig = lookup(enums, id); + const File::Offset offset = file->currentOffset(); + if (!sig) { + sig = new EnumSig; + sig->id = id; + sig->name = read_string(); + Value *value = parse_value(); + sig->value = value->toSInt(); + delete value; + enums[id] = sig; + m_enumSigOffsets.insert(offset); + } + assert(sig); +} + + +void Parser::scan_bitmask() { + size_t id = read_uint(); + BitmaskSig *sig = lookup(bitmasks, id); + const File::Offset offset = file->currentOffset(); + if (!sig) { + sig = new BitmaskSig; + sig->id = id; + sig->num_flags = read_uint(); + BitmaskFlag *flags = new BitmaskFlag[sig->num_flags]; + for (BitmaskFlag *it = flags; it != flags + sig->num_flags; ++it) { + it->name = read_string(); + it->value = read_uint(); + if (it->value == 0 && it != flags) { + std::cerr << "warning: bitmask " << it->name << " is zero but is not first flag\n"; + } + } + sig->flags = flags; + bitmasks[id] = sig; + m_bitmaskSigOffsets.insert(offset); + } + assert(sig); + + skip_uint(); /* value */ +} + + +void Parser::scan_array(void) { + size_t len = read_uint(); + for (size_t i = 0; i < len; ++i) { + scan_value(); + } +} + + +void Parser::scan_blob(void) { + size_t size = read_uint(); + if (size) { + file->skip((unsigned)size); + } +} + + +void Parser::scan_struct() { + size_t id = read_uint(); + + StructSig *sig = lookup(structs, id); + const File::Offset offset = file->currentOffset(); + if (!sig) { + sig = new StructSig; + sig->id = id; + sig->name = read_string(); + sig->num_members = read_uint(); + const char **member_names = new const char *[sig->num_members]; + for (unsigned i = 0; i < sig->num_members; ++i) { + member_names[i] = read_string(); + } + sig->member_names = member_names; + structs[id] = sig; + m_structSigOffsets.insert(offset); + } + assert(sig); + + for (size_t i = 0; i < sig->num_members; ++i) { + scan_value(); + } +} + + +void Parser::scan_opaque() { + skip_uint(); +} + + +void Parser::skip_string(void) { + size_t len = read_uint(); + file->skip((unsigned)len); +} + + +void Parser::skip_uint(void) { + int c; + do { + c = file->getc(); + if (c == -1) { + break; + } + } while(c & 0x80); +} + + +inline void Parser::skip_byte(void) { + file->skip(1); +} + + } /* namespace Trace */ diff --git a/trace_parser.hpp b/trace_parser.hpp index f340da0..1f87b6f 100644 --- a/trace_parser.hpp +++ b/trace_parser.hpp @@ -112,6 +112,8 @@ public: next_call_no = num; } + Call *scan_call(); + protected: void parse_enter(void); @@ -150,6 +152,45 @@ protected: unsigned long long read_uint(void); inline int read_byte(void); + +protected: + void scan_enter(void); + + Call *scan_leave(void); + + bool scan_call_details(Call *call); + + void scan_arg(Call *call); + + void scan_value(void); + + void scan_sint(); + + void scan_uint(); + + void scan_float(); + + void scan_double(); + + void scan_string(); + + void scan_enum(); + + void scan_bitmask(); + + void scan_array(void); + + void scan_blob(void); + + void scan_struct(); + + void scan_opaque(); + + void skip_string(void); + + void skip_uint(void); + + inline void skip_byte(void); }; diff --git a/trace_snappyfile.cpp b/trace_snappyfile.cpp index 07a62f6..443ebe7 100644 --- a/trace_snappyfile.cpp +++ b/trace_snappyfile.cpp @@ -268,3 +268,27 @@ void SnappyFile::setCurrentOffset(const File::Offset &offset) m_cachePtr = m_cache + offset.offsetInChunk; } + +bool SnappyFile::rawSkip(unsigned length) +{ + if (endOfData()) { + return false; + } + + if (freeCacheSize() >= length) { + m_cachePtr += length; + } else { + int sizeToRead = length; + while (sizeToRead) { + int chunkSize = std::min(freeCacheSize(), sizeToRead); + m_cachePtr += chunkSize; + sizeToRead -= chunkSize; + if (sizeToRead > 0) + flushCache(); + if (!m_cacheSize) + break; + } + } + + return true; +} diff --git a/trace_snappyfile.hpp b/trace_snappyfile.hpp index 2ee95be..ecf39de 100644 --- a/trace_snappyfile.hpp +++ b/trace_snappyfile.hpp @@ -62,6 +62,7 @@ protected: virtual int rawGetc(); virtual void rawClose(); virtual void rawFlush(); + virtual bool rawSkip(unsigned length); private: inline int freeCacheSize() const -- 2.43.0