From 03263ab4bdd31537bc4f48a825225a47b4b74a86 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 30 May 2011 11:40:06 +0100 Subject: [PATCH] Parse traces directly from python. Mostly for prototyping, as it 4x slower, and eats more memory. --- scripts/format.py | 171 +++++++++++++ scripts/trace.py | 580 +++++++++++++++++++++++++++++++++++++++++++ scripts/tracediff.py | 115 ++++++--- 3 files changed, 827 insertions(+), 39 deletions(-) create mode 100755 scripts/format.py create mode 100755 scripts/trace.py diff --git a/scripts/format.py b/scripts/format.py new file mode 100755 index 0000000..9842f23 --- /dev/null +++ b/scripts/format.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2008-2009 VMware, Inc. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +##########################################################################/ + + +import sys + + +class Formatter: + '''Plain formatter''' + + def __init__(self, stream): + self.stream = stream + + def text(self, text): + self.stream.write(text) + + def newline(self): + self.text('\n') + + def function(self, name): + self.text(name) + + def variable(self, name): + self.text(name) + + def literal(self, value): + self.text(str(value)) + + def address(self, addr): + self.text(str(addr)) + + +class AnsiFormatter(Formatter): + '''Formatter for plain-text files which outputs ANSI escape codes. See + http://en.wikipedia.org/wiki/ANSI_escape_code for more information + concerning ANSI escape codes. + ''' + + _csi = '\33[' + + _normal = '0m' + _bold = '1m' + _italic = '3m' + _red = '31m' + _green = '32m' + _blue = '34m' + + def _escape(self, code): + self.text(self._csi + code) + + def function(self, name): + self._escape(self._bold) + Formatter.function(self, name) + self._escape(self._normal) + + def variable(self, name): + self._escape(self._italic) + Formatter.variable(self, name) + self._escape(self._normal) + + def literal(self, value): + self._escape(self._blue) + Formatter.literal(self, value) + self._escape(self._normal) + + def address(self, value): + self._escape(self._green) + Formatter.address(self, value) + self._escape(self._normal) + + +class WindowsConsoleFormatter(Formatter): + '''Formatter for the Windows Console. See + http://code.activestate.com/recipes/496901/ for more information. + ''' + + STD_INPUT_HANDLE = -10 + STD_OUTPUT_HANDLE = -11 + STD_ERROR_HANDLE = -12 + + FOREGROUND_BLUE = 0x01 + FOREGROUND_GREEN = 0x02 + FOREGROUND_RED = 0x04 + FOREGROUND_INTENSITY = 0x08 + BACKGROUND_BLUE = 0x10 + BACKGROUND_GREEN = 0x20 + BACKGROUND_RED = 0x40 + BACKGROUND_INTENSITY = 0x80 + + _normal = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED + _bold = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY + _italic = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED + _red = FOREGROUND_RED | FOREGROUND_INTENSITY + _green = FOREGROUND_GREEN | FOREGROUND_INTENSITY + _blue = FOREGROUND_BLUE | FOREGROUND_INTENSITY + + def __init__(self, stream): + Formatter.__init__(self, stream) + + if stream is sys.stdin: + nStdHandle = self.STD_INPUT_HANDLE + elif stream is sys.stdout: + nStdHandle = self.STD_OUTPUT_HANDLE + elif stream is sys.stderr: + nStdHandle = self.STD_ERROR_HANDLE + else: + nStdHandle = None + + if nStdHandle: + import ctypes + self.handle = ctypes.windll.kernel32.GetStdHandle(nStdHandle) + else: + self.handle = None + + def _attribute(self, attr): + if self.handle: + import ctypes + ctypes.windll.kernel32.SetConsoleTextAttribute(self.handle, attr) + + def function(self, name): + self._attribute(self._bold) + Formatter.function(self, name) + self._attribute(self._normal) + + def variable(self, name): + self._attribute(self._italic) + Formatter.variable(self, name) + self._attribute(self._normal) + + def literal(self, value): + self._attribute(self._blue) + Formatter.literal(self, value) + self._attribute(self._normal) + + def address(self, value): + self._attribute(self._green) + Formatter.address(self, value) + self._attribute(self._normal) + + +def DefaultFormatter(stream): + if sys.platform in ('linux2', 'cygwin'): + return AnsiFormatter(stream) + elif sys.platform in ('win32',): + return WindowsConsoleFormatter(stream) + else: + return Formatter(stream) + diff --git a/scripts/trace.py b/scripts/trace.py new file mode 100755 index 0000000..5549910 --- /dev/null +++ b/scripts/trace.py @@ -0,0 +1,580 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2011 Jose Fonseca +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the 'Software'), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +##########################################################################/ + + +import gzip +import string +import struct +import sys + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +import format + + +class GzipFile(gzip.GzipFile): + + def _read_eof(self): + # Ignore incomplete files + try: + gzip.GzipFile._read_eof(self) + except IOError: + pass + + +TRACE_VERSION = 1 + +EVENT_ENTER, EVENT_LEAVE = range(2) + +CALL_END, CALL_ARG, CALL_RET, CALL_THREAD = range(4) + +TYPE_NULL, TYPE_FALSE, TYPE_TRUE, TYPE_SINT, TYPE_UINT, TYPE_FLOAT, TYPE_DOUBLE, TYPE_STRING, TYPE_BLOB, TYPE_ENUM, TYPE_BITMASK, TYPE_ARRAY, TYPE_STRUCT, TYPE_OPAQUE = range(14) + + +class Signature: + '''Dummy class for signatures.''' + + pass + + +class Node: + + def visit(self, visitor): + raise NotImplementedError + + def pretty_print(self, formatter): + pretty_printer = PrettyPrinter(formatter) + self.visit(pretty_printer) + + def __str__(self): + stream = StringIO() + formatter = format.Formatter(stream) + self.pretty_print(formatter) + return stream.getvalue() + + def __eq__(self, other): + raise NotImplementedError + + def __hash__(self): + raise NotImplementedError + + +class Literal(Node): + + def __init__(self, value): + self.value = value + + def visit(self, visitor): + visitor.visit_literal(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.value == other.value + + def __hash__(self): + return hash(self.value) + + +class Enum(Node): + + def __init__(self, name, value): + self.name = name + self.value = value + + def visit(self, visitor): + visitor.visit_enum(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.name == other.name and \ + self.value == other.value + + def __hash__(self): + return hash(self.value) + + +class Array(Node): + + def __init__(self, elements): + self.elements = tuple(elements) + + def visit(self, visitor): + visitor.visit_array(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.elements == other.elements + + def __hash__(self): + return hash(self.elements) + + +class Pointer(Node): + + def __init__(self, value): + self.value = value + + def visit(self, visitor): + visitor.visit_pointer(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.value == other.value + + def __hash__(self): + return hash(self.value) + + +def Null(): + return Enum("NULL", 0) + +def Bitmask(sig, value): + return Literal(value) + +def Blob(buf): + return Literal('blob(%u)' % len(buf)) + + +class Struct(Node): + + def __init__(self, sig, members): + self.sig = sig + self.members = tuple(members) + + def visit(self, visitor): + visitor.visit_struct(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.sig.member_names == other.sig.member_names and \ + self.members == other.members + + def __hash__(self): + return hash(self.sig.member_names) ^ hash(self.members) + + +class Call(Node): + + def __init__(self, sig): + self.sig = sig + self.args = [None] * len(sig.arg_names) + self.ret = None + + def get_name(self): + return self.sig.name + + name = property(get_name) + + def visit(self, visitor): + visitor.visit_call(self) + + def __eq__(self, other): + return \ + self.__class__ == other.__class__ and \ + self.sig.name == other.sig.name and \ + self.args == other.args and \ + self.ret == other.ret + + def __hash__(self): + return hash(self.sig.name) ^ hash(tuple(self.args)) ^ hash(self.ret) + + +class Trace(Node): + + def __init__(self, calls): + self.calls = calls + + def visit(self, visitor): + visitor.visit_trace(self) + + +class Visitor: + + def visit_literal(self, node): + raise NotImplementedError + + def visit_enum(self, node): + raise NotImplementedError + + def visit_array(self, node): + raise NotImplementedError + + def visit_struct(self, node): + raise NotImplementedError + + def visit_pointer(self, node): + raise NotImplementedError + + def visit_call(self, node): + raise NotImplementedError + + def visit_trace(self, node): + raise NotImplementedError + + +class PrettyPrinter: + + def __init__(self, formatter): + self.formatter = formatter + + def visit_literal(self, node): + if isinstance(node.value, basestring): + if len(node.value) >= 4096 or node.value.strip(string.printable): + self.formatter.text('...') + return + + self.formatter.literal('"' + node.value + '"') + return + + self.formatter.literal(repr(node.value)) + + def visit_enum(self, node): + self.formatter.literal(node.name) + + def visit_array(self, node): + self.formatter.text('{') + sep = '' + for value in node.elements: + self.formatter.text(sep) + value.visit(self) + sep = ', ' + self.formatter.text('}') + + def visit_struct(self, node): + self.formatter.text('{') + sep = '' + for name, value in zip(node.sig.member_names, node.members): + self.formatter.text(sep) + self.formatter.variable(name) + self.formatter.text(' = ') + value.visit(self) + sep = ', ' + self.formatter.text('}') + + def visit_pointer(self, node): + self.formatter.address(node.value) + + def visit_call(self, node): + #self.formatter.text('%s ' % node.no) + self.formatter.function(node.name) + self.formatter.text('(') + sep = '' + for name, value in zip(node.sig.arg_names, node.args): + self.formatter.text(sep) + self.formatter.variable(name) + self.formatter.text(' = ') + value.visit(self) + sep = ', ' + self.formatter.text(')') + if node.ret is not None: + self.formatter.text(' = ') + node.ret.visit(self) + + def visit_trace(self, node): + for call in node.calls: + call.visit(self) + self.formatter.newline() + + +class Parser: + + def __init__(self): + self.file = None + self.next_call_no = 0 + self.version = 0 + + self.functions = {} + self.enums = {} + self.bitmasks = {} + self.structs = {} + + self.calls = [] + + def open(self, filename): + self.file = GzipFile(filename, "rb") + if not self.file: + return False + + version = self.read_uint() + if version > TRACE_VERSION: + sys.stderr.write("error: unsupported trace format version %u\n" % version) + return False + + return True + + def parse_call(self): + while True: + c = self.read_byte() + if c == EVENT_ENTER: + self.parse_enter() + elif c == EVENT_LEAVE: + return self.parse_leave() + elif c == -1: + return None + else: + sys.stderr.write("error: unknown event %i\n" % c) + sys.exit(1) + + def parse_enter(self): + id = self.read_uint() + + try: + sig = self.functions[id] + except KeyError: + sig = Signature() + sig.name = self.read_string() + num_args = self.read_uint() + sig.arg_names = tuple([self.read_string() for i in range(num_args)]) + self.functions[id] = sig + + call = Call(sig) + call.no = self.next_call_no + self.next_call_no += 1 + + if self.parse_call_details(call): + self.calls.append(call) + else: + del call + + def parse_leave(self): + call_no = self.read_uint() + call = None + for i in range(len(self.calls)): + if self.calls[i].no == call_no: + call = self.calls.pop(i) + break + if call is None: + return None + + if self.parse_call_details(call): + return call + else: + del call + return None + + def parse_call_details(self, call): + while True: + c = self.read_byte() + if c == CALL_END: + return True + elif c == CALL_ARG: + self.parse_arg(call) + elif c == CALL_RET: + call.ret = self.parse_value() + else: + sys.stderr.write("error: unknown call detail %i\n" % c) + sys.exit(1) + + def parse_arg(self, call): + index = self.read_uint() + value = self.parse_value() + if index >= len(call.args): + call.args.resize(index + 1) + call.args[index] = value + + def parse_value(self): + c = self.read_byte() + if c == TYPE_NULL: + value = Null() + elif c == TYPE_FALSE: + value = Literal(False) + elif c == TYPE_TRUE: + value = Literal(True) + elif c == TYPE_SINT: + value = self.parse_sint() + elif c == TYPE_UINT: + value = self.parse_uint() + elif c == TYPE_FLOAT: + value = self.parse_float() + elif c == TYPE_DOUBLE: + value = self.parse_double() + elif c == TYPE_STRING: + value = self.parse_string() + elif c == TYPE_ENUM: + value = self.parse_enum() + elif c == TYPE_BITMASK: + value = self.parse_bitmask() + elif c == TYPE_ARRAY: + value = self.parse_array() + elif c == TYPE_STRUCT: + value = self.parse_struct() + elif c == TYPE_BLOB: + value = self.parse_blob() + elif c == TYPE_OPAQUE: + value = self.parse_opaque() + else: + sys.stderr.write("error: unknown type %i\n" % c) + sys.exit(1) + #self.debug("\tVALUE %s\n" % value) + return value + + def parse_sint(self): + return Literal(-self.read_uint()) + + def parse_uint(self): + return Literal(self.read_uint()) + + def parse_float(self): + value = self.file.read(4) + value, = struct.unpack('f', value) + return Literal(value) + + def parse_double(self): + value = self.file.read(8) + value, = struct.unpack('d', value) + return Literal(value) + + def parse_string(self): + return Literal(self.read_string()) + + def parse_enum(self): + id = self.read_uint() + try: + enum = self.enums[id] + except KeyError: + name = self.read_string() + value = self.parse_value() + enum = Enum(name, value) + self.enums[id] = enum + return enum + + def parse_bitmask(self): + id = self.read_uint() + try: + sig = self.bitmasks[id] + except KeyError: + sig = Signature() + num_flags = self.read_uint() + sig.flags = [] + for i in range(num_flags): + name = self.read_string() + value = self.read_uint() + if value == 0 and i: + sys.stderr.write("warning: bitmask %s is zero but is not first flag\n" % name) + flag = name, value + sig.flags.append(flag) + self.bitmasks[id] = sig + assert sig + + value = self.read_uint() + + return Bitmask(sig, value) + + def parse_array(self): + size = self.read_uint() + elements = [self.parse_value() for i in range(size)] + return Array(elements) + + def parse_blob(self): + size = self.read_uint() + if size: + buf = self.file.read(size) + else: + buf = "" + return Blob(buf) + + def parse_struct(self): + id = self.read_uint() + + try: + sig = self.structs[id] + except KeyError: + sig = Signature() + sig.name = self.read_string() + num_members = self.read_uint() + sig.member_names = tuple([self.read_string() for i in range(num_members)]) + self.structs[id] = sig + + members = [self.parse_value() for i in range(len(sig.member_names))] + value = Struct(sig, members) + + return value + + def parse_opaque(self): + addr = self.read_uint() + return Pointer(addr) + + def read_string(self): + size = self.read_uint() + if size: + value = self.file.read(size) + else: + value = '' + #self.debug("\tSTRING \"%s\"\n" % value) + return value + + def read_uint(self): + value = 0 + shift = 0 + while True: + c = self.file.read(1) + if c == "": + return 0 + c = ord(c) + value |= (c & 0x7f) << shift + shift += 7 + if c & 0x80 == 0: + break + #self.debug("\tUINT %u\n" % value) + return value + + def read_byte(self): + c = self.file.read(1) + if c == "": + #self.debug("\tEOF\n") + return -1 + else: + c = ord(c) + #self.debug("\tBYTE 0x%x\n" % c) + return c + + def debug(self, s): + sys.stderr.write(s) + + +def main(): + formatter = format.DefaultFormatter(sys.stdout) + for arg in sys.argv[1:]: + parser = Parser() + parser.open(arg) + call = parser.parse_call() + while call: + formatter.text('%u ' % call.no) + call.pretty_print(formatter) + formatter.text('\n') + call = parser.parse_call() + + +if __name__ == '__main__': + main() diff --git a/scripts/tracediff.py b/scripts/tracediff.py index 9614a21..2506eb7 100755 --- a/scripts/tracediff.py +++ b/scripts/tracediff.py @@ -28,19 +28,14 @@ import difflib import optparse import os.path -import re -import subprocess import sys +from trace import Parser -call_re = re.compile('^([0-9]+) (\w+)\(') - -ansi_re = re.compile('\x1b\[[0-9]{1,2}(;[0-9]{1,2}){0,2}m') - - -def ansi_strip(s): - # http://www.theeggeadventure.com/wikimedia/index.php/Linux_Tips#Use_sed_to_remove_ANSI_colors - return ansi_re.sub('', s) +try: + import debug +except ImportError: + pass ignored_function_names = set([ @@ -54,20 +49,16 @@ ignored_function_names = set([ def readtrace(trace): - p = subprocess.Popen([options.tracedump, trace], stdout=subprocess.PIPE) - lines = [] - for line in p.stdout.readlines(): - line = ansi_strip(line) - mo = call_re.match(line) - if mo: - function_name = mo.group(2) - if function_name in ignored_function_names: - continue - lines.append(line[mo.start(2):]) - else: - lines[-1] += line - p.wait() - return lines + calls = [] + parser = Parser() + parser.open(trace) + call = parser.parse_call() + while call and len(calls) < 1000: + hash(call) + if call.sig.name not in ignored_function_names: + calls.append(call) + call = parser.parse_call() + return calls class SDiffer: @@ -80,21 +71,61 @@ class SDiffer: matcher = difflib.SequenceMatcher(None, self.a, self.b) for tag, alo, ahi, blo, bhi in matcher.get_opcodes(): if tag == 'replace': - g = self.replace(alo, ahi, blo, bhi) + self.replace(alo, ahi, blo, bhi) elif tag == 'delete': - g = self.delete(alo, ahi) + self.delete(alo, ahi) elif tag == 'insert': - g = self.insert(blo, bhi) + self.insert(blo, bhi) elif tag == 'equal': - g = self.equal(alo, ahi) + self.equal(alo, ahi) else: raise ValueError, 'unknown tag %s' % (tag,) - for line in g: - yield line - def replace(self, alo, ahi, blo, bhi): assert alo < ahi and blo < bhi + + a_names = [call.name for call in self.a[alo:ahi]] + b_names = [call.name for call in self.b[blo:bhi]] + + matcher = difflib.SequenceMatcher(None, a_names, b_names) + for tag, _alo, _ahi, _blo, _bhi in matcher.get_opcodes(): + _alo += alo + _ahi += alo + _blo += blo + _bhi += blo + if tag == 'replace': + self.replace_dissimilar(_alo, _ahi, _blo, _bhi) + elif tag == 'delete': + self.delete(_alo, _ahi) + elif tag == 'insert': + self.insert(_blo, _bhi) + elif tag == 'equal': + self.replace_similar(_alo, _ahi, _blo, _bhi) + else: + raise ValueError, 'unknown tag %s' % (tag,) + + def replace_similar(self, alo, ahi, blo, bhi): + assert alo < ahi and blo < bhi + assert ahi - alo == bhi - blo + for i in xrange(0, bhi - blo): + a_call = self.a[alo + i] + b_call = self.b[blo + i] + assert a_call.name == b_call.name + assert len(a_call.args) == len(b_call.args) + sys.stdout.write(b_call.name + '(') + sep = '' + for j in xrange(len(b_call.args)): + sys.stdout.write(sep) + self.replace_value(a_call.args[j], b_call.args[j]) + sep = ', ' + sys.stdout.write(')') + if a_call.ret is not None or b_call.ret is not None: + sys.stdout.write(' = ') + self.replace_value(a_call.ret, b_call.ret) + sys.stdout.write('\n') + + def replace_dissimilar(self, alo, ahi, blo, bhi): + assert alo < ahi and blo < bhi if bhi - blo < ahi - alo: first = self.insert(blo, bhi) second = self.delete(alo, ahi) @@ -106,20 +137,26 @@ class SDiffer: for line in g: yield line + def replace_value(self, a, b): + if b == a: + sys.stdout.write(str(b)) + else: + sys.stdout.write('%s -> %s' % (a, b)) + escape = "\33[" def delete(self, alo, ahi): - return self.dump('- ' + self.escape + '9m', self.a, alo, ahi, self.escape + '0m') + self.dump('- ' + self.escape + '9m', self.a, alo, ahi, self.escape + '0m') def insert(self, blo, bhi): - return self.dump('+ ', self.b, blo, bhi) + self.dump('+ ', self.b, blo, bhi) def equal(self, alo, ahi): - return self.dump(' ' + self.escape + '2m', self.a, alo, ahi, self.escape + '0m') + self.dump(' ' + self.escape + '2m', self.a, alo, ahi, self.escape + '0m') def dump(self, prefix, x, lo, hi, suffix=""): for i in xrange(lo, hi): - yield prefix + str(x[i]) + suffix + sys.stdout.write(prefix + str(x[i]) + suffix + '\n') def main(): @@ -137,11 +174,11 @@ def main(): if len(args) != 2: optparser.error("incorrect number of arguments") - ref_lines = readtrace(args[0]) - src_lines = readtrace(args[1]) + ref_calls = readtrace(args[0]) + src_calls = readtrace(args[1]) - diff = SDiffer(ref_lines, src_lines).diff() - sys.stdout.writelines(diff) + differ = SDiffer(ref_calls, src_calls) + differ.diff() if __name__ == '__main__': -- 2.43.0