2 ##########################################################################
4 # Copyright 2008-2009 VMware, Inc.
7 # Permission is hereby granted, free of charge, to any person obtaining a copy
8 # of this software and associated documentation files (the "Software"), to deal
9 # in the Software without restriction, including without limitation the rights
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 # copies of the Software, and to permit persons to whom the Software is
12 # furnished to do so, subject to the following conditions:
14 # The above copyright notice and this permission notice shall be included in
15 # all copies or substantial portions of the Software.
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 ##########################################################################/
30 import xml.parsers.expat
36 ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)
41 def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
42 assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
44 self.name_or_data = name_or_data
50 if self.type == ELEMENT_START:
51 return '<' + self.name_or_data + ' ...>'
52 if self.type == ELEMENT_END:
53 return '</' + self.name_or_data + '>'
54 if self.type == CHARACTER_DATA:
55 return self.name_or_data
62 """Expat based XML tokenizer."""
64 def __init__(self, fp, skip_ws = True):
69 self.skip_ws = skip_ws
71 self.character_pos = 0, 0
72 self.character_data = ''
74 self.parser = xml.parsers.expat.ParserCreate()
75 self.parser.StartElementHandler = self.handle_element_start
76 self.parser.EndElementHandler = self.handle_element_end
77 self.parser.CharacterDataHandler = self.handle_character_data
79 def handle_element_start(self, name, attributes):
80 self.finish_character_data()
81 line, column = self.pos()
82 token = XmlToken(ELEMENT_START, name, attributes, line, column)
83 self.tokens.append(token)
85 def handle_element_end(self, name):
86 self.finish_character_data()
87 line, column = self.pos()
88 token = XmlToken(ELEMENT_END, name, None, line, column)
89 self.tokens.append(token)
91 def handle_character_data(self, data):
92 if not self.character_data:
93 self.character_pos = self.pos()
94 self.character_data += data
96 def finish_character_data(self):
97 if self.character_data:
98 if not self.skip_ws or not self.character_data.isspace():
99 line, column = self.character_pos
100 token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
101 self.tokens.append(token)
102 self.character_data = ''
106 while self.index >= len(self.tokens) and not self.final:
109 data = self.fp.read(size)
110 self.final = len(data) < size
111 data = data.rstrip('\0')
113 self.parser.Parse(data, self.final)
114 except xml.parsers.expat.ExpatError, e:
115 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
120 if self.index >= len(self.tokens):
121 line, column = self.pos()
122 token = XmlToken(EOF, None, None, line, column)
124 token = self.tokens[self.index]
129 return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
132 class TokenMismatch(Exception):
134 def __init__(self, expected, found):
135 self.expected = expected
139 return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
144 """Base XML document parser."""
146 def __init__(self, fp):
147 self.tokenizer = XmlTokenizer(fp)
151 self.token = self.tokenizer.next()
153 def match_element_start(self, name):
154 return self.token.type == ELEMENT_START and self.token.name_or_data == name
156 def match_element_end(self, name):
157 return self.token.type == ELEMENT_END and self.token.name_or_data == name
159 def element_start(self, name):
160 while self.token.type == CHARACTER_DATA:
162 if self.token.type != ELEMENT_START:
163 raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
164 if self.token.name_or_data != name:
165 raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
166 attrs = self.token.attrs
170 def element_end(self, name):
171 while self.token.type == CHARACTER_DATA:
173 if self.token.type != ELEMENT_END:
174 raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
175 if self.token.name_or_data != name:
176 raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
179 def character_data(self, strip = True):
181 while self.token.type == CHARACTER_DATA:
182 data += self.token.name_or_data
189 class GzipFile(gzip.GzipFile):
192 # Ignore incomplete files
194 gzip.GzipFile._read_eof(self)
199 class TraceParser(XmlParser):
201 def __init__(self, stream):
202 XmlParser.__init__(self, stream)
206 self.element_start('trace')
207 while self.token.type not in (ELEMENT_END, EOF):
209 if self.token.type != EOF:
210 self.element_end('trace')
212 def parse_call(self):
213 attrs = self.element_start('call')
218 while self.token.type == ELEMENT_START:
219 if self.token.name_or_data == 'arg':
220 arg = self.parse_arg()
222 elif self.token.name_or_data == 'ret':
223 ret = self.parse_ret()
224 elif self.token.name_or_data in ('duration', 'starttsc', 'endtsc'):
225 property = self.token.name_or_data
226 properties[property] = self.parse_hex(self.token.name_or_data)
227 elif self.token.name_or_data == 'call':
228 # ignore nested function calls
231 raise TokenMismatch("<arg ...> or <ret ...>", self.token)
232 self.element_end('call')
236 call = Call(self.call_no, name, args, ret, properties)
238 self.handle_call(call)
241 attrs = self.element_start('arg')
243 value = self.parse_value()
244 self.element_end('arg')
249 attrs = self.element_start('ret')
250 value = self.parse_value()
251 self.element_end('ret')
255 def parse_hex(self, token_name):
256 attrs = self.element_start(token_name)
257 value = int(self.character_data(), 16)
258 self.element_end(token_name)
261 def parse_value(self):
262 if self.token.type == ELEMENT_START:
263 if self.token.name_or_data == 'int':
264 return self.parse_int()
265 if self.token.name_or_data == 'uint':
266 return self.parse_uint()
267 if self.token.name_or_data == 'float':
268 return self.parse_float()
269 if self.token.name_or_data == 'string':
270 return self.parse_string()
271 if self.token.name_or_data == 'wstring':
272 return self.parse_wstring()
273 if self.token.name_or_data == 'const':
274 return self.parse_const()
275 if self.token.name_or_data == 'bitmask':
276 return self.parse_bitmask()
277 if self.token.name_or_data == 'ref':
278 return self.parse_ref()
279 raise TokenMismatch("<elem ...>, <ref ...>, or text", self.token)
281 def parse_elems(self):
282 elems = [self.parse_elem()]
283 while self.token.type != ELEMENT_END:
284 elems.append(self.parse_elem())
285 return Struct("", elems)
287 def parse_elem(self):
288 attrs = self.element_start('elem')
289 value = self.parse_value()
290 self.element_end('elem')
300 attrs = self.element_start('ref')
301 if self.token.type != ELEMENT_END:
302 value = self.parse_value()
305 self.element_end('ref')
307 return Pointer(attrs['addr'], value)
309 def parse_bitmask(self):
310 self.element_start('bitmask')
312 while self.token.type != ELEMENT_END:
313 elems.append(self.parse_value())
314 self.element_end('bitmask')
315 return Bitmask(elems)
318 self.element_start('int')
319 value = self.character_data()
320 self.element_end('int')
321 return Literal(int(value))
323 def parse_uint(self):
324 self.element_start('uint')
325 value = self.character_data()
326 self.element_end('uint')
327 return Literal(int(value))
329 def parse_float(self):
330 self.element_start('float')
331 value = self.character_data()
332 self.element_end('float')
333 return Literal(float(value))
335 def parse_string(self):
336 self.element_start('string')
337 value = self.character_data()
338 self.element_end('string')
339 return Literal(value)
341 def parse_wstring(self):
342 self.element_start('wstring')
343 value = self.character_data()
344 self.element_end('wstring')
345 return Literal(value)
347 def parse_const(self):
348 self.element_start('const')
349 value = self.character_data()
350 self.element_end('const')
351 return NamedConstant(value)
353 def handle_call(self, call):
357 class DumpTraceParser(TraceParser):
359 def __init__(self, stream, formatter):
360 XmlParser.__init__(self, stream)
361 self.formatter = formatter
362 self.pretty_printer = PrettyPrinter(self.formatter)
365 def handle_call(self, call):
366 call.visit(self.pretty_printer)
367 self.formatter.newline()
370 class StatsTraceParser(TraceParser):
372 def __init__(self, stream, formatter):
373 TraceParser.__init__(self, stream, formatter)
377 TraceParser.parse(self)
379 sys.stdout.write('%s\t%s\t%s\n' % ("name", "calls", "duration"))
380 for name, (calls, duration) in self.stats.iteritems():
381 sys.stdout.write('%s\t%u\t%f\n' % (name, calls, duration/1000000.0))
383 def handle_call(self, name, args, ret, duration):
385 nr_calls, total_duration = self.stats[name]
388 total_duration = duration
391 if duration is not None:
392 total_duration += duration
393 self.stats[name] = nr_calls, total_duration
402 optparser = self.get_optparser()
403 (options, args) = optparser.parse_args(sys.argv[1:])
407 if arg.endswith('.gz'):
408 from gzip import GzipFile
409 stream = GzipFile(arg, 'rb')
410 elif arg.endswith('.bz2'):
411 from bz2 import BZ2File
412 stream = BZ2File(arg, 'rU')
414 stream = open(arg, 'rt')
415 self.process_arg(stream, options)
417 self.process_arg(stream, options)
419 def get_optparser(self):
420 optparser = optparse.OptionParser(
421 usage="\n\t%prog [options] [traces] ...")
422 optparser.add_option(
425 dest="stats", default=False,
426 help="generate statistics instead")
427 optparser.add_option(
428 '--color', '--colour',
429 type="choice", choices=('never', 'always', 'auto'), metavar='WHEN',
430 dest="color", default="always",
431 help="coloring: never, always, or auto [default: %default]")
434 def process_arg(self, stream, options):
435 if options.color == 'always' or options.color == 'auto' and sys.stdout.isatty():
436 formatter = format.DefaultFormatter(sys.stdout)
438 formatter = format.Formatter(sys.stdout)
441 factory = StatsTraceParser
443 factory = DumpTraceParser
445 parser = DumpTraceParser(stream, formatter)
449 if __name__ == '__main__':