From 166e4c0065829d26b2d0272335f983162d9ab4df Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 22 Aug 2012 11:59:12 -0700 Subject: [PATCH] trim: Use custom skiplist for required list (instead of std::set) The std::set::insert method has been dominating profile output for trimming large traces. Meanwhile, the access patterns of TraceAnalyzer on the required list is extremely structured; it often adds the next sequential integer. So we can optimize this data structure dramatically with two additions: 1. Store ranges rather than single elements, so that the addition of a new number can expand an existing range rather than inserting a new element. 2. Remember the most recent element looked-up so that we can check it first and not do any new lookup for in-order accesses. With these two changes we get O(1) behavior for the access pattern of adding sequential integers. For other cases we can still get logarithmic behavior, but we can keep N dramatically smaller. Unfortunately, we can't implement either change (that I can see) with STL. I don't see any way to support lookup with range-based elements in any STL data structure. So here, I implement a skip-list based callset data structure. It provides optimization (1) above and affords an easy implementation of (2) as well, (though that is postponed to a later commit). The skip-list implementation is ported from a C version which Keith Packard and I implemented for cairo, and with the identical license as the license of apitrace. The reference I used is the commit named c2509f8a721ec489e1b44fa8a68be165363787a7 in the cairo repository. In addition to changing from C to C++, I implemented range-based elements. --- cli/CMakeLists.txt | 1 + cli/cli_trim.cpp | 4 +- cli/trace_analyzer.cpp | 6 +- cli/trace_analyzer.hpp | 5 +- cli/trim_callset.cpp | 187 +++++++++++++++++++++++++++++++++++++++++ cli/trim_callset.hpp | 98 +++++++++++++++++++++ 6 files changed, 294 insertions(+), 7 deletions(-) create mode 100644 cli/trim_callset.cpp create mode 100644 cli/trim_callset.hpp diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt index 689374d..3165f7d 100644 --- a/cli/CMakeLists.txt +++ b/cli/CMakeLists.txt @@ -24,6 +24,7 @@ add_executable (apitrace cli_trim.cpp cli_resources.cpp trace_analyzer.cpp + trim_callset.cpp ) target_link_libraries (apitrace diff --git a/cli/cli_trim.cpp b/cli/cli_trim.cpp index 4a116ee..6694737 100644 --- a/cli/cli_trim.cpp +++ b/cli/cli_trim.cpp @@ -203,7 +203,7 @@ trim_trace(const char *filename, struct trim_options *options) trace::ParseBookmark beginning; trace::Parser p; TraceAnalyzer analyzer(options->trim_flags); - std::set *required; + trim::CallSet *required; unsigned frame; int call_range_first, call_range_last; @@ -296,7 +296,7 @@ trim_trace(const char *filename, struct trim_options *options) break; } - if (required->find(call->no) != required->end()) { + if (required->contains(call->no)) { writer.writeCall(call); if (options->print_callset) { diff --git a/cli/trace_analyzer.cpp b/cli/trace_analyzer.cpp index 803c81c..80ec15e 100644 --- a/cli/trace_analyzer.cpp +++ b/cli/trace_analyzer.cpp @@ -162,7 +162,7 @@ TraceAnalyzer::consume(std::string resource) resources.erase(resource); for (call = calls.begin(); call != calls.end(); call++) { - required.insert(*call); + required.add(*call); } } @@ -754,13 +754,13 @@ TraceAnalyzer::require(trace::Call *call) requireDependencies(call); /* Then insert this call itself. */ - required.insert(call->no); + required.add(call->no); } /* Return a set of all the required calls, (both those calls added * explicitly with require() and those implicitly depended * upon. */ -std::set * +trim::CallSet * TraceAnalyzer::get_required(void) { return &required; diff --git a/cli/trace_analyzer.hpp b/cli/trace_analyzer.hpp index ef89ad7..6e9140a 100644 --- a/cli/trace_analyzer.hpp +++ b/cli/trace_analyzer.hpp @@ -29,6 +29,7 @@ #include #include "trace_callset.hpp" +#include "trim_callset.hpp" #include "trace_parser.hpp" typedef unsigned TrimFlags; @@ -58,7 +59,7 @@ private: std::map texture_map; - std::set required; + trim::CallSet required; bool transformFeedbackActive; bool framebufferObjectActive; @@ -109,5 +110,5 @@ public: /* Return a set of all the required calls, (both those calls added * explicitly with require() and those implicitly depended * upon. */ - std::set *get_required(void); + trim::CallSet *get_required(void); }; diff --git a/cli/trim_callset.cpp b/cli/trim_callset.cpp new file mode 100644 index 0000000..236880a --- /dev/null +++ b/cli/trim_callset.cpp @@ -0,0 +1,187 @@ +/********************************************************************* + * + * Copyright 2006 Keith Packard and Carl Worth + * Copyright 2012 Intel Corporation + * Copyright 2012 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *********************************************************************/ + +#include +#include +#include + +#include "trim_callset.hpp" + +using namespace trim; + +#define MAX_LEVEL 16 + +CallRange::CallRange(CallNo call_no, int level) +{ + first = call_no; + last = call_no; + this->level = level; + + next = new CallRange*[level]; +} + +bool +CallRange::contains(CallNo call_no) +{ + return (first <= call_no && last >= call_no); +} + +CallSet::CallSet(): head(0, MAX_LEVEL) +{ + head.first = std::numeric_limits::max(); + head.last = std::numeric_limits::min(); + + for (int i = 0; i < MAX_LEVEL; i++) + head.next[i] = NULL; + + max_level = 0; +} + +/* + * Generate a random level number, distributed + * so that each level is 1/4 as likely as the one before + * + * Note that level numbers run 1 <= level < MAX_LEVEL + */ +static int +random_level (void) +{ + /* tricky bit -- each bit is '1' 75% of the time */ + long int bits = random() | random(); + int level = 1; + + while (level < MAX_LEVEL) + { + if (bits & 1) + break; + level++; + bits >>= 1; + } + + return level; +} + +void +CallSet::add(CallNo call_no) +{ + CallRange **update[MAX_LEVEL]; + CallRange *node; + int i, level; + + /* Find node immediately before insertion point. */ + node = &head; + for (i = max_level - 1; i >= 0; i--) { + while (node->next[i] && call_no > node->next[i]->last) { + node = node->next[i]; + } + update[i] = &node->next[i]; + } + + /* Can we contain call_no by expanding tail of current range by 1? */ + if (node != &head && node->last == call_no - 1) { + + CallRange *next = node->next[0]; + + node->last = call_no; + + /* Also merge with next node as needed. */ + if (next && next->first == call_no + 1) { + node->last = next->last; + + /* Delete node 'next' */ + for (i = 0; i < node->level && i < next->level; i++) { + node->next[i] = next->next[i]; + } + + for (; i < next->level; i++) { + *update[i] = next->next[i]; + } + + delete next; + } + + return; + } + + /* Current range could not contain call_no, look at next. */ + node = node->next[0]; + + if (node) { + /* Do nothing if call_no is already contained. */ + if (node->first <= call_no) { + return; + } + + /* Can we exand head of range by 1? */ + if (node->first == call_no + 1) { + node->first = call_no; + return; + } + } + + /* Not possible to expand any existing node, so create new one. */ + + level = random_level(); + + /* Handle first node of this level. */ + if (level > max_level) { + level = max_level + 1; + update[max_level] = &head.next[max_level]; + max_level = level; + } + + node = new CallRange(call_no, level); + + /* Perform insertion into all lists. */ + for (i = 0; i < level; i++) { + node->next[i] = *update[i]; + *update[i] = node; + } +} + +bool +CallSet::contains(CallNo call_no) +{ + CallRange *node; + int i; + + node = &head; + for (i = max_level - 1; i >= 0; i--) { + while (node->next[i] && call_no > node->next[i]->last) { + node = node->next[i]; + } + } + + node = node->next[0]; + + if (node == NULL) + return false; + + return node->contains(call_no); +} diff --git a/cli/trim_callset.hpp b/cli/trim_callset.hpp new file mode 100644 index 0000000..667ec5e --- /dev/null +++ b/cli/trim_callset.hpp @@ -0,0 +1,98 @@ +/********************************************************************* + * + * Copyright 2012 Intel Corporation + * Copyright 2012 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *********************************************************************/ + +#ifndef _TRIM_CALLSET_HPP_ +#define _TRIM_CALLSET_HPP_ + +namespace trim { + +/* A set of call numbers. + * + * This is designed as a more efficient replacement for + * std::set which is used heavily within the trim code's + * TraceAnalyzer. This is quite similar to the trace::CallSet with the + * following differences: + * + * Simplifications: + * + * * This supports only the addition of a single call number, not + * the addition of a range. + * + * * There is no support here for the 'step' and 'freq' features + * supported by trace::CallSet. + * + * Sophistications: + * + * * This callset is implemented with a skip list for + * (probabilistically) logarithmic lookup times for + * out-of-order lookups. + * + * * This callset optimizes the addition of new calls which are + * within or adjacent to existing ranges, (by either doing + * nothing, expanding the limits of an existing range, or also + * merging two ranges). This keeps the length of the list down + * when succesively adding individual calls that could be + * efficiently expressed with a range. + * + * It would not be impossible to extend this code to support the + * missing features of trace::CallSet, (though the 'step' and 'freq' + * features would prevent some range-extending and merging + * optimizations in some cases). Currently, trace::CallSet is not used + * in any performance-critical areas, so it may not be important to + * provide the performance imrpovements to it. + */ + +typedef unsigned CallNo; + +class CallRange { +public: + CallNo first; + CallNo last; + int level; + CallRange **next; + + CallRange(CallNo call_no, int level); + + bool contains(CallNo call_no); +}; + +class CallSet { +public: + CallRange head; + int max_level; + + CallSet(); + + void add(CallNo call_no); + + bool contains(CallNo call_no); +}; + +} /* namespace trim */ + +#endif /* _TRIM_CALLSET_HPP_ */ -- 2.43.0