]> git.cworth.org Git - apitrace/blob - scripts/jsondiff.py
Strip (non-standard) JSON comments.
[apitrace] / scripts / jsondiff.py
1 #!/usr/bin/env python
2 ##########################################################################
3 #
4 # Copyright 2011 Jose Fonseca
5 # All Rights Reserved.
6 #
7 # Permission is hereby granted, free of charge, to any person obtaining a copy
8 # of this software and associated documentation files (the "Software"), to deal
9 # in the Software without restriction, including without limitation the rights
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 # copies of the Software, and to permit persons to whom the Software is
12 # furnished to do so, subject to the following conditions:
13 #
14 # The above copyright notice and this permission notice shall be included in
15 # all copies or substantial portions of the Software.
16 #
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 # THE SOFTWARE.
24 #
25 ##########################################################################/
26
27
28 import json
29 import optparse
30 import re
31 import sys
32
33
34 def strip_object_hook(obj):
35     if '__class__' in obj:
36         return None
37     for name in obj.keys():
38         if name.startswith('__') and name.endswith('__'):
39             del obj[name]
40     return obj
41
42
43 class Visitor:
44
45     def visit(self, node, *args, **kwargs):
46         if isinstance(node, dict):
47             return self.visit_object(node, *args, **kwargs)
48         elif isinstance(node, list):
49             return self.visit_array(node, *args, **kwargs)
50         else:
51             return self.visit_value(node, *args, **kwargs)
52
53     def visit_object(self, node, *args, **kwargs):
54         pass
55
56     def visit_array(self, node, *args, **kwargs):
57         pass
58
59     def visit_value(self, node, *args, **kwargs):
60         pass
61
62
63 class Dumper(Visitor):
64
65     def __init__(self, stream = sys.stdout):
66         self.stream = stream
67         self.level = 0
68
69     def _write(self, s):
70         self.stream.write(s)
71
72     def _indent(self):
73         self._write('  '*self.level)
74
75     def _newline(self):
76         self._write('\n')
77
78     def visit_object(self, node):
79         self.enter_object()
80
81         members = node.keys()
82         members.sort()
83         for i in range(len(members)):
84             name = members[i]
85             value = node[name]
86             self.enter_member(name)
87             self.visit(value)
88             self.leave_member(i == len(members) - 1)
89         self.leave_object()
90
91     def enter_object(self):
92         self._write('{')
93         self._newline()
94         self.level += 1
95
96     def enter_member(self, name):
97         self._indent()
98         self._write('%s: ' % name)
99
100     def leave_member(self, last):
101         if not last:
102             self._write(',')
103         self._newline()
104
105     def leave_object(self):
106         self.level -= 1
107         self._indent()
108         self._write('}')
109         if self.level <= 0:
110             self._newline()
111
112     def visit_array(self, node):
113         self.enter_array()
114         for i in range(len(node)):
115             value = node[i]
116             self._indent()
117             self.visit(value)
118             if i != len(node) - 1:
119                 self._write(',')
120             self._newline()
121         self.leave_array()
122
123     def enter_array(self):
124         self._write('[')
125         self._newline()
126         self.level += 1
127
128     def leave_array(self):
129         self.level -= 1
130         self._indent()
131         self._write(']')
132
133     def visit_value(self, node):
134         self._write(json.dumps(node))
135
136
137
138 class Comparer(Visitor):
139
140     def __init__(self, ignore_added = False):
141         self.ignore_added = ignore_added
142
143     def visit_object(self, a, b):
144         if not isinstance(b, dict):
145             return False
146         if len(a) != len(b) and not self.ignore_added:
147             return False
148         ak = a.keys()
149         bk = b.keys()
150         ak.sort()
151         bk.sort()
152         if ak != bk and not self.ignore_added:
153             return False
154         for k in ak:
155             ae = a[k]
156             try:
157                 be = b[k]
158             except KeyError:
159                 return False
160             if not self.visit(ae, be):
161                 return False
162         return True
163
164     def visit_array(self, a, b):
165         if not isinstance(b, list):
166             return False
167         if len(a) != len(b):
168             return False
169         for ae, be in zip(a, b):
170             if not self.visit(ae, be):
171                 return False
172         return True
173
174     def visit_value(self, a, b):
175         return a == b
176
177
178
179 class Differ(Visitor):
180
181     def __init__(self, stream = sys.stdout, ignore_added = False):
182         self.dumper = Dumper(stream)
183         self.comparer = Comparer(ignore_added = ignore_added)
184
185     def visit(self, a, b):
186         if self.comparer.visit(a, b):
187             return
188         Visitor.visit(self, a, b)
189
190     def visit_object(self, a, b):
191         if not isinstance(b, dict):
192             self.replace(a, b)
193         else:
194             self.dumper.enter_object()
195             names = set(a.keys())
196             if not self.comparer.ignore_added:
197                 names.update(b.keys())
198             names = list(names)
199             names.sort()
200
201             for i in range(len(names)):
202                 name = names[i]
203                 ae = a.get(name, None)
204                 be = b.get(name, None)
205                 if not self.comparer.visit(ae, be):
206                     self.dumper.enter_member(name)
207                     self.visit(ae, be)
208                     self.dumper.leave_member(i == len(names) - 1)
209
210             self.dumper.leave_object()
211
212     def visit_array(self, a, b):
213         if not isinstance(b, list):
214             self.replace(a, b)
215         else:
216             self.dumper.enter_array()
217             max_len = max(len(a), len(b))
218             for i in range(max_len):
219                 try:
220                     ae = a[i]
221                 except IndexError:
222                     ae = None
223                 try:
224                     be = b[i]
225                 except IndexError:
226                     be = None
227                 self.dumper._indent()
228                 if self.comparer.visit(ae, be):
229                     self.dumper.visit(ae)
230                 else:
231                     self.visit(ae, be)
232                 if i != max_len - 1:
233                     self.dumper._write(',')
234                 self.dumper._newline()
235
236             self.dumper.leave_array()
237
238     def visit_value(self, a, b):
239         if a != b:
240             self.replace(a, b)
241
242     def replace(self, a, b):
243         self.dumper.visit(a)
244         self.dumper._write(' -> ')
245         self.dumper.visit(b)
246
247
248 #
249 # Unfortunately JSON standard does not include comments, but this is a quite
250 # useful feature to have on regressions tests
251 #
252
253 _token_res = [
254     r'//[^\r\n]*', # comment
255     r'"[^"\\]*(\\.[^"\\]*)*"', # string
256 ]
257
258 _tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
259
260
261 def _strip_comment(mo):
262     if mo.group(1):
263         return ''
264     else:
265         return mo.group(0)
266
267
268 def _strip_comments(data):
269     '''Strip (non-standard) JSON comments.'''
270     return _tokens_re.sub(_strip_comment, data)
271
272
273 assert _strip_comments('''// a comment
274 "// a comment in a string
275 "''') == '''
276 "// a comment in a string
277 "'''
278
279
280 def load(stream, strip_images = True, strip_comments = True):
281     if strip_images:
282         object_hook = strip_object_hook
283     else:
284         object_hook = None
285     if strip_comments:
286         data = stream.read()
287         data = _strip_comments(data)
288         return json.loads(data, strict=False, object_hook = object_hook)
289     else:
290         return json.load(stream, strict=False, object_hook = object_hook)
291
292
293 def main():
294     optparser = optparse.OptionParser(
295         usage="\n\t%prog [options] <ref_json> <src_json>")
296     optparser.add_option(
297         '--keep-images',
298         action="store_false", dest="strip_images", default=True,
299         help="compare images")
300
301     (options, args) = optparser.parse_args(sys.argv[1:])
302
303     if len(args) != 2:
304         optparser.error('incorrect number of arguments')
305
306     a = load(open(sys.argv[1], 'rt'), options.strip_images)
307     b = load(open(sys.argv[2], 'rt'), options.strip_images)
308
309     if False:
310         dumper = Dumper()
311         dumper.visit(a)
312
313     differ = Differ()
314     differ.visit(a, b)
315
316
317 if __name__ == '__main__':
318     main()