From e4cf0e9b02570ebb29289fdffe87d25a72e56663 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 3 Jul 2009 13:10:37 +0100 Subject: [PATCH] Handle incomplete gzip files. --- xml2txt.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xml2txt.py b/xml2txt.py index 5c92aff..21f0fc4 100755 --- a/xml2txt.py +++ b/xml2txt.py @@ -22,6 +22,7 @@ import sys import optparse import xml.parsers.expat +import gzip ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4) @@ -99,6 +100,7 @@ class XmlTokenizer: self.index = 0 data = self.fp.read(size) self.final = len(data) < size + data = data.rstrip('\0') try: self.parser.Parse(data, self.final) except xml.parsers.expat.ExpatError, e: @@ -176,6 +178,16 @@ class XmlParser: return data +class GzipFile(gzip.GzipFile): + + def _read_eof(self): + # Ignore incomplete files + try: + gzip.GzipFile._read_eof(self) + except IOError: + pass + + class Formatter: def function(self, name): @@ -337,7 +349,6 @@ def main(): if args: for arg in args: if arg.endswith('.gz'): - from gzip import GzipFile stream = GzipFile(arg, 'rt') elif arg.endswith('.bz2'): from bz2 import BZ2File -- 2.45.2