class XmlTokenizer:
"""Expat based XML tokenizer."""
- def __init__(self, fp, strip = True):
+ def __init__(self, fp, skip_ws = True):
self.fp = fp
self.tokens = []
self.index = 0
self.final = False
- self.strip = strip
+ self.skip_ws = skip_ws
+
+ self.character_pos = 0, 0
+ self.character_data = ''
self.parser = xml.parsers.expat.ParserCreate()
self.parser.StartElementHandler = self.handle_element_start
self.parser.CharacterDataHandler = self.handle_character_data
def handle_element_start(self, name, attributes):
+ self.finish_character_data()
line, column = self.pos()
token = XmlToken(ELEMENT_START, name, attributes, line, column)
self.tokens.append(token)
def handle_element_end(self, name):
+ self.finish_character_data()
line, column = self.pos()
token = XmlToken(ELEMENT_END, name, None, line, column)
self.tokens.append(token)
def handle_character_data(self, data):
- if self.strip:
- data = data.strip()
- if not data:
- return
-
- line, column = self.pos()
- token = XmlToken(CHARACTER_DATA, data, None, line, column)
- self.tokens.append(token)
+ if not self.character_data:
+ self.character_pos = self.pos()
+ self.character_data += data
+
+ def finish_character_data(self):
+ if self.character_data:
+ if not self.skip_ws or not self.character_data.isspace():
+ line, column = self.character_pos
+ token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
+ self.tokens.append(token)
+ self.character_data = ''
def next(self):
size = 16*1024
self.index = 0
data = self.fp.read(size)
self.final = len(data) < size
- self.parser.Parse(data, self.final)
+ try:
+ self.parser.Parse(data, self.final)
+ except xml.parsers.expat.ExpatError, e:
+ #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
+ if e.code == 3:
+ pass
+ else:
+ raise e
if self.index >= len(self.tokens):
line, column = self.pos()
token = XmlToken(EOF, None, None, line, column)
raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
self.consume()
- def character_data(self):
- if self.token.type == CHARACTER_DATA:
- data = self.token.name_or_data
+ def character_data(self, strip = True):
+ data = ''
+ while self.token.type == CHARACTER_DATA:
+ data += self.token.name_or_data
self.consume()
- else:
- data = ''
+ if strip:
+ data = data.strip()
return data
+class Formatter:
+
+ def function(self, name):
+ return name
+
+ def variable(self, name):
+ return name
+
+ def literal(self, value):
+ return str(value)
+
+ def address(self, addr):
+ return addr
+
+
+class AnsiFormatter(Formatter):
+ '''Formatter for plain-text files which outputs ANSI escape codes. See
+ http://en.wikipedia.org/wiki/ANSI_escape_code for more information
+ concerning ANSI escape codes.
+ '''
+
+ _csi = '\33['
+
+ _normal = '0m'
+ _bold = '1m'
+ _italic = '3m'
+ _red = '31m'
+ _green = '32m'
+ _blue = '34m'
+
+ def _escape(self, code, text):
+ return self._csi + code + text + self._csi + self._normal
+
+ def function(self, name):
+ text = Formatter.function(self, name)
+ return self._escape(self._bold, text)
+
+ def variable(self, name):
+ text = Formatter.variable(self, name)
+ return self._escape(self._italic, text)
+
+ def literal(self, value):
+ text = Formatter.literal(self, value)
+ return self._escape(self._blue, text)
+
+ def address(self, value):
+ text = Formatter.address(self, value)
+ return self._escape(self._green, text)
+
+
class TraceParser(XmlParser):
+ def __init__(self, stream, formatter):
+ XmlParser.__init__(self, stream)
+ self.formatter = formatter
+
def parse(self):
self.element_start('trace')
while self.token.type != ELEMENT_END:
args.append(arg)
elif self.token.name_or_data == 'ret':
ret = self.parse_ret()
+ elif self.token.name_or_data == 'call':
+ self.parse_call()
else:
raise TokenMismatch("<arg ...> or <ret ...>", self.token)
self.element_end('call')
- sys.stdout.write(name)
- sys.stdout.write('(' + ', '.join([name + ' = ' + value for name, value in args]) + ')')
+ call = self.formatter.function(name)
+ call += '(' + ', '.join([self.formatter.variable(name) + ' = ' + value for name, value in args]) + ')'
if ret is not None:
- sys.stdout.write(' = ' + ret)
- sys.stdout.write('\n')
+ call += ' = ' + ret
+ call += '\n'
+
+ try:
+ sys.stdout.write(call)
+ except IOError:
+ # catch broken pipe
+ sys.exit(0)
def parse_arg(self):
attrs = self.element_start('arg')
def parse_value(self):
if self.token.type == CHARACTER_DATA:
- return self.character_data()
+ return self.formatter.literal(self.character_data())
if self.token.type == ELEMENT_START:
if self.token.name_or_data == 'elem':
return self.parse_elems()
if self.token.type != ELEMENT_END:
value = '&' + self.parse_value()
else:
- value = attrs['addr']
+ value = self.formatter.address(attrs['addr'])
self.element_end('ref')
return value
def main():
- for arg in sys.argv[1:]:
- parser = TraceParser(open(arg, 'rt'))
- parser.parse()
+ formatter = AnsiFormatter()
+
+ args = sys.argv[1:]
+ if args:
+ for arg in args:
+ if arg.endswith('.gz'):
+ from gzip import GzipFile
+ stream = GzipFile(arg, 'rt')
+ elif arg.endswith('.bz2'):
+ from bz2 import BZ2File
+ stream = BZ2File(arg, 'rt')
+ else:
+ stream = open(arg, 'rt')
+ parser = TraceParser(stream, formatter)
+ parser.parse()
+ else:
+ parser = TraceParser(sys.stdin, formatter)
+ parser.parse()
if __name__ == '__main__':