X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=scripts%2Funpickle.py;h=50d8480aac948143f78c091bb5198d2a04ec4773;hb=3801952b80cd7a7160f6410518f6e3740d461b60;hp=fd7989b8ff3be7cab3b711fc2f9c48212c9be05b;hpb=baee57937dcb8acefd86f1b0edf3a6dfa96c047a;p=apitrace diff --git a/scripts/unpickle.py b/scripts/unpickle.py index fd7989b..50d8480 100755 --- a/scripts/unpickle.py +++ b/scripts/unpickle.py @@ -33,25 +33,148 @@ Run as: ''' +import itertools import optparse -import cPickle as pickle import sys import time +import re +import cPickle as pickle + + +class Visitor: + + def __init__(self): + self.dispatch = {} + self.dispatch[type(None)] = self.visitNone + self.dispatch[bool] = self.visitBool + self.dispatch[int] = self.visitInt + self.dispatch[long] = self.visitInt + self.dispatch[float] = self.visitFloat + self.dispatch[str] = self.visitStr + self.dispatch[tuple] = self.visitTuple + self.dispatch[list] = self.visitList + self.dispatch[dict] = self.visitDict + self.dispatch[bytearray] = self.visitByteArray + + def visit(self, obj): + method = self.dispatch.get(type(obj), self.visitObj) + return method(obj) + + def visitObj(self, obj): + raise NotImplementedError + + def visitAtom(self, obj): + return self.visitObj(obj) + + def visitNone(self, obj): + return self.visitAtom(obj) + + def visitBool(self, obj): + return self.visitAtom(obj) + + def visitInt(self, obj): + return self.visitAtom(obj) + + def visitFloat(self, obj): + return self.visitAtom(obj) + + def visitStr(self, obj): + return self.visitAtom(obj) + + def visitIterable(self, obj): + return self.visitObj(obj) + + def visitTuple(self, obj): + return self.visitIterable(obj) + + def visitList(self, obj): + return self.visitIterable(obj) + + def visitDict(self, obj): + raise NotImplementedError + + def visitByteArray(self, obj): + raise NotImplementedError + + +class Dumper(Visitor): + + id_re = re.compile('^[_A-Za-z][_A-Za-z0-9]*$') + + def visitObj(self, obj): + return repr(obj) + + def visitStr(self, obj): + if self.id_re.match(obj): + return obj + else: + return repr(obj) + + def visitTuple(self, obj): + return '[' + ', '.join(itertools.imap(self.visit, obj)) + ']' + + def visitList(self, obj): + return '(' + ', '.join(itertools.imap(self.visit, obj)) + ')' + + def visitByteArray(self, obj): + return 'blob(%u)' % len(obj) + + +class Hasher(Visitor): + '''Returns a hashable version of the objtree.''' + + def visitObj(self, obj): + return obj + + def visitAtom(self, obj): + return obj + + def visitIterable(self, obj): + return tuple(itertools.imap(self.visit, obj)) + + def visitByteArray(self, obj): + return str(obj) + + +class Rebuilder(Visitor): + '''Returns a hashable version of the objtree.''' + + def visitAtom(self, obj): + return obj + + def visitIterable(self, obj): + changed = False + newItems = [] + for oldItem in obj: + newItem = self.visit(oldItem) + if newItem is not oldItem: + changed = True + newItems.append(newItem) + if changed: + klass = type(obj) + return klass(newItems) + else: + return obj + + def visitByteArray(self, obj): + return obj class Call: def __init__(self, callTuple): self.no, self.functionName, self.args, self.ret = callTuple + self._hash = None def __str__(self): s = self.functionName if self.no is not None: s = str(self.no) + ' ' + s - s += '(' + ', '.join(map(repr, self.args)) + ')' + dumper = Dumper() + s += '(' + ', '.join(itertools.imap(dumper.visit, self.args)) + ')' if self.ret is not None: s += ' = ' - s += repr(self.ret) + s += dumper.visit(self.ret) return s def __eq__(self, other): @@ -61,10 +184,11 @@ class Call: self.ret == other.ret def __hash__(self): - # XXX: hack due to unhashable types - #return hash(self.functionName) ^ hash(tuple(self.args)) ^ hash(self.ret) - return hash(self.functionName) ^ hash(repr(self.args)) ^ hash(repr(self.ret)) - + if self._hash is None: + hasher = Hasher() + hashable = hasher.visit(self.functionName), hasher.visit(self.args), hasher.visit(self.ret) + self._hash = hash(hashable) + return self._hash class Unpickler: @@ -94,25 +218,42 @@ class Unpickler: class Counter(Unpickler): - def __init__(self, stream, quiet): + def __init__(self, stream, verbose = False): Unpickler.__init__(self, stream) - self.quiet = quiet - self.calls = 0 + self.verbose = verbose + self.numCalls = 0 + self.functionFrequencies = {} + + def parse(self): + Unpickler.parse(self) + + functionFrequencies = self.functionFrequencies.items() + functionFrequencies.sort(lambda (name1, freq1), (name2, freq2): cmp(freq1, freq2)) + for name, frequency in functionFrequencies: + sys.stdout.write('%8u %s\n' % (frequency, name)) def handleCall(self, call): - if not self.quiet: + if self.verbose: sys.stdout.write(str(call)) sys.stdout.write('\n') - self.calls += 1 + self.numCalls += 1 + try: + self.functionFrequencies[call.functionName] += 1 + except KeyError: + self.functionFrequencies[call.functionName] = 1 def main(): optparser = optparse.OptionParser( - usage="\n\tapitrace pickle trace. %prog [options]") + usage="\n\tapitrace pickle | %prog [options]") optparser.add_option( - '--quiet', - action="store_true", dest="quiet", default=False, - help="don't dump calls to stdout") + '-p', '--profile', + action="store_true", dest="profile", default=False, + help="profile call parsing") + optparser.add_option( + '-v', '--verbose', + action="store_true", dest="verbose", default=False, + help="dump calls to stdout") (options, args) = optparser.parse_args(sys.argv[1:]) @@ -129,11 +270,13 @@ def main(): msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) startTime = time.time() - parser = Counter(sys.stdin, options.quiet) + parser = Counter(sys.stdin, options.verbose) parser.parse() stopTime = time.time() duration = stopTime - startTime - sys.stderr.write('%u calls, %.03f secs, %u calls/sec\n' % (parser.calls, duration, parser.calls/duration)) + + if options.profile: + sys.stderr.write('Processed %u calls in %.03f secs, at %u calls/sec\n' % (parser.numCalls, duration, parser.numCalls/duration)) if __name__ == '__main__':