1 files changed, 191 insertions, 0 deletions
diff --git a/gzread.py b/gzread.py
new file mode 100644
index 000000000..c1ca12324
--- /dev/null
+++ b/gzread.py
@@ -0,0 +1,191 @@
+import string
+import zlib
+import __builtin__
+
+# implements a python function that reads and writes a gzipped file
+# the user of the file doesn't have to worry about the compression,
+# but random access is not allowed
+
+# based on Andrew Kuchling's minigzip.py distributed with the zlib module
+
+FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
+
+def read32(buf):
+    v = ord(buf[0])
+    v = v + (ord(buf[1]) << 8)
+    v = v + (ord(buf[2]) << 16)
+    v = v + (ord(buf[3]) << 24)
+    return v
+
+def open(filename, fileobj=None):
+    return GzipFile(filename, fileobj)
+
+class GzipFile:
+
+    myfileobj = None
+
+    def __init__(self, filename=None, fileobj=None):
+	if fileobj is None:
+	    fileobj = self.myfileobj = __builtin__.open(filename, 'r')
+	self._init_read()
+	self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+	self.fileobj = fileobj
+	self.compressed = 1
+	self._read_gzip_header()
+
+    def _init_read(self):
+	self.crc = zlib.crc32("")
+	self.size = 0
+	self.extrabuf = ""
+	self.extrasize = 0
+	self.lastbuf = ""
+
+    def _read_gzip_header(self):
+	magic = self.fileobj.read(2)
+	if magic != '\037\213':
+	    self._unread(magic)
+	    self.compressed = 0
+	    return
+	method = ord( self.fileobj.read(1) )
+	if method != 8:
+	    raise RuntimeError, 'Unknown compression method'
+	flag = ord( self.fileobj.read(1) )
+	# modtime = self.fileobj.read(4)
+	# extraflag = self.fileobj.read(1)
+	# os = self.fileobj.read(1)
+	self.fileobj.read(6)
+
+	if flag & FEXTRA:
+	    # Read & discard the extra field, if present
+	    xlen=ord(self.fileobj.read(1))	      
+	    xlen=xlen+256*ord(self.fileobj.read(1))
+	    self.fileobj.read(xlen)
+	if flag & FNAME:
+	    # Read and discard a null-terminated string containing the filename
+	    while (1):
+		s=self.fileobj.read(1)
+		if not s or s=='\000': break
+	if flag & FCOMMENT:
+	    # Read and discard a null-terminated string containing a comment
+	    while (1):
+		s=self.fileobj.read(1)
+		if not s or s=='\000': break
+	if flag & FHCRC:
+	    self.fileobj.read(2)     # Read & discard the 16-bit header CRC
+
+    def read(self,size=None):
+	if self.extrasize <= 0 and self.fileobj is None:
+	    return ''
+
+	if not self.compressed:
+	    chunk = ''
+	    if size and self.extrasize >= size:
+		chunk = self.extrabuf[:size]
+		self.extrabuf = self.extrabuf[size:]
+		self.extrasize = self.extrasize - size
+		return chunk
+	    if self.extrasize:
+		chunk = self.extrabuf
+		if size:
+		    size = size - self.extrasize
+		self.extrasize = 0
+		self.extrabuf = ''
+	    if not size:
+		return chunk + self.fileobj.read()
+	    else:
+		return chunk + self.fileobj.read(size)
+
+	readsize = 1024
+	if not size:	# get the whole thing
+	    try:
+		while 1:
+		    self._read(readsize)
+		    readsize = readsize * 2
+	    except EOFError:
+		size = self.extrasize
+	else:	       # just get some more of it
+	    try:
+		while size > self.extrasize:
+		    self._read(readsize)
+		    readsize = readsize * 2
+	    except EOFError:
+		pass
+	
+	chunk = self.extrabuf[:size]
+	self.extrabuf = self.extrabuf[size:]
+	self.extrasize = self.extrasize - size
+
+	return chunk
+
+    def _unread(self, buf):
+	self.extrabuf = buf + self.extrabuf
+	self.extrasize = len(buf) + self.extrasize
+
+    def _read(self, size=1024):
+	try:
+	    buf = self.fileobj.read(size)
+	except AttributeError:
+	    raise EOFError, "Reached EOF"
+	if buf == "":
+	    uncompress = self.decompress.flush()
+	    if uncompress == "":
+		self._read_eof()
+		self.fileobj = None
+		raise EOFError, 'Reached EOF'
+	else:
+	    xlen = len(buf)
+	    if xlen >= 8:
+		xoff = 0
+		boff = xlen - 8
+	    else:
+		xoff = 8 - xlen
+		boff = 0
+	    self.lastbuf = self.lastbuf[:xoff] + buf[boff:]
+	    uncompress = self.decompress.decompress(buf)
+	self.crc = zlib.crc32(uncompress, self.crc)
+	self.extrabuf = self.extrabuf + uncompress
+	self.extrasize = self.extrasize + len(uncompress)
+	self.size = self.size + len(uncompress)
+
+    def _read_eof(self):
+	crc32 = read32(self.lastbuf[:4])
+	isize = read32(self.lastbuf[4:8])
+	if crc32 != self.crc:
+	    raise IOError, 'CRC check failed'
+	elif isize != self.size:
+	    raise IOError, 'Incorrect length of data produced'
+
+    def close(self):
+	self.fileobj = None
+	if self.myfileobj:
+	    self.myfileobj.close()
+	    self.myfileobj = None
+
+    def flush(self):
+	self.fileobj.flush()
+
+    def seek(self):
+	raise IOError, 'Random access not allowed in gzip files'
+
+    def tell(self):
+	raise IOError, 'I won\'t tell() you for gzip files'
+
+    def isatty(self):
+	return 0
+
+    def readline(self):
+	bufs = []
+	readsize = 100
+	while 1:
+	    c = self.read(readsize)
+	    i = string.find(c, '\n')
+	    if i >= 0 or c == '':
+		bufs.append(c[:i])
+		self._unread(c[i+1:])
+		return string.join(bufs, '')
+	    bufs.append(c)
+	    readsize = readsize * 2
+
+    def readlines(self):
+	buf = self.read()
+	return string.split(buf, '\n')