1 files changed, 269 insertions, 0 deletions
diff --git a/BitTorrent/zurllib.py b/BitTorrent/zurllib.py
new file mode 100644
index 0000000..8bbe5ba
--- /dev/null
+++ b/BitTorrent/zurllib.py
@@ -0,0 +1,269 @@
+#
+# zurllib.py
+#
+# This is (hopefully) a drop-in for urllib which will request gzip/deflate
+# compression and then decompress the output if a compressed response is
+# received while maintaining the API.
+#
+# by Robert Stone 2/22/2003
+# extended by Matt Chisholm
+# tracker announce --bind support added by Jeremy Evans 11/2005
+
+import sys
+
+import threading
+import thread
+from BitTorrent import PeerID
+user_agent = PeerID.make_id()
+del PeerID
+
+import urllib2
+OldOpenerDirector = urllib2.OpenerDirector
+
+class MyOpenerDirector(OldOpenerDirector):
+    def __init__(self):
+        OldOpenerDirector.__init__(self)
+        self.addheaders = [('User-agent', user_agent)]
+
+urllib2.OpenerDirector = MyOpenerDirector
+
+del urllib2
+
+from httplib import HTTPConnection, HTTP
+from urllib import *
+from urllib2 import *
+from gzip import GzipFile
+from StringIO import StringIO
+import pprint
+
+DEBUG=0
+
+http_bindaddr = None
+
+# ow ow ow.
+# this is here so we can track open http connections in our pending
+# connection count. we have to buffer because maybe urllib connections
+# start before rawserver does - hopefully not more than 10 of them!
+#
+# this can all go away when we use a reasonable http client library
+# and the connections are managed inside rawserver
+class PreRawServerBuffer(object):
+    def __init__(self):
+        self.pending_sockets = {}
+        self.pending_sockets_lock = threading.Lock()
+
+    def _add_pending_connection(self, addr):
+        # the XP connection rate limiting is unique at the IP level
+        assert isinstance(addr, str)
+        self.pending_sockets_lock.acquire()
+        self.__add_pending_connection(addr)
+        self.pending_sockets_lock.release()
+
+    def __add_pending_connection(self, addr):        
+        if addr not in self.pending_sockets:
+            self.pending_sockets[addr] = 1
+        else:
+            self.pending_sockets[addr] += 1
+
+    def _remove_pending_connection(self, addr):
+        self.pending_sockets_lock.acquire()
+        self.__remove_pending_connection(addr)
+        self.pending_sockets_lock.release()
+
+    def __remove_pending_connection(self, addr):
+        self.pending_sockets[addr] -= 1
+        if self.pending_sockets[addr] <= 0:
+            del self.pending_sockets[addr]
+rawserver = PreRawServerBuffer()
+
+def bind_tracker_connection(bindaddr):
+    global http_bindaddr
+    http_bindaddr = bindaddr
+
+def set_zurllib_rawserver(new_rawserver):
+    global rawserver
+    for addr in rawserver.pending_sockets:
+        new_rawserver._add_pending_connections(addr)
+        rawserver._remove_pending_connection(addr)
+    assert len(rawserver.pending_sockets) == 0
+    rawserver = new_rawserver
+
+unsafe_threads = []
+def add_unsafe_thread():
+    global unsafe_threads
+    unsafe_threads.append(thread.get_ident())
+
+class BindingHTTPConnection(HTTPConnection):
+    def connect(self):
+
+        ident = thread.get_ident()
+        # never, ever, ever call urlopen from any of these threads        
+        assert ident not in unsafe_threads
+
+        """Connect to the host and port specified in __init__."""
+        msg = "getaddrinfo returns an empty list"
+        for res in socket.getaddrinfo(self.host, self.port, 0,
+                                      socket.SOCK_STREAM):
+            af, socktype, proto, canonname, sa = res
+
+            addr = sa[0]
+            # the obvious multithreading problem is avoided by using locks.
+            # the lock is only acquired during the function call, so there's
+            # no danger of urllib blocking rawserver.
+            rawserver._add_pending_connection(addr)
+            try:
+                self.sock = socket.socket(af, socktype, proto)
+                if http_bindaddr:
+                    self.sock.bind((http_bindaddr, 0))
+                if self.debuglevel > 0:
+                    print "connect: (%s, %s)" % (self.host, self.port)
+                self.sock.connect(sa)
+            except socket.error, msg:
+                if self.debuglevel > 0:
+                    print 'connect fail:', (self.host, self.port)
+                if self.sock:
+                    self.sock.close()
+                self.sock = None
+            rawserver._remove_pending_connection(addr)
+
+            if self.sock:
+                break
+                   
+        if not self.sock:
+            raise socket.error, msg
+
+class BindingHTTP(HTTP):
+    _connection_class = BindingHTTPConnection
+
+if sys.version_info >= (2,4):
+    BindingHTTP = BindingHTTPConnection
+
+class HTTPContentEncodingHandler(HTTPHandler):
+    """Inherit and add gzip/deflate/etc support to HTTP gets."""
+    def http_open(self, req):
+        # add the Accept-Encoding header to the request
+        # support gzip encoding (identity is assumed)
+        req.add_header("Accept-Encoding","gzip")
+        if DEBUG: 
+            print "Sending:"
+            print req.headers
+            print "\n"
+        fp = self.do_open(BindingHTTP, req)
+        headers = fp.headers
+        if DEBUG: 
+             pprint.pprint(headers.dict)
+        url = fp.url
+        resp = addinfourldecompress(fp, headers, url)
+        if hasattr(fp, 'code'):
+            resp.code = fp.code
+        if hasattr(fp, 'msg'):
+            resp.msg = fp.msg
+        return resp
+
+class addinfourldecompress(addinfourl):
+    """Do gzip decompression if necessary. Do addinfourl stuff too."""
+    def __init__(self, fp, headers, url):
+        # we need to do something more sophisticated here to deal with
+        # multiple values?  What about other weird crap like q-values?
+        # basically this only works for the most simplistic case and will
+        # break in some other cases, but for now we only care about making
+        # this work with the BT tracker so....
+        if headers.has_key('content-encoding') and headers['content-encoding'] == 'gzip':
+            if DEBUG:
+                print "Contents of Content-encoding: " + headers['Content-encoding'] + "\n"
+            self.gzip = 1
+            self.rawfp = fp
+            fp = GzipStream(fp)
+        else:
+            self.gzip = 0
+        return addinfourl.__init__(self, fp, headers, url)
+
+    def close(self):
+        self.fp.close()
+        if self.gzip:
+            self.rawfp.close()
+
+    def iscompressed(self):
+        return self.gzip
+
+class GzipStream(StringIO):
+    """Magically decompress a file object.
+
+       This is not the most efficient way to do this but GzipFile() wants
+       to seek, etc, which won't work for a stream such as that from a socket.
+       So we copy the whole shebang info a StringIO object, decompress that
+       then let people access the decompressed output as a StringIO object.
+
+       The disadvantage is memory use and the advantage is random access.
+
+       Will mess with fixing this later.
+    """
+
+    def __init__(self,fp):
+        self.fp = fp
+
+        # this is nasty and needs to be fixed at some point
+        # copy everything into a StringIO (compressed)
+        compressed = StringIO()
+        r = fp.read()
+        while r:
+            compressed.write(r)
+            r = fp.read()
+        # now, unzip (gz) the StringIO to a string
+        compressed.seek(0,0)
+        gz = GzipFile(fileobj = compressed)
+        str = ''
+        r = gz.read()
+        while r:
+            str += r
+            r = gz.read()
+        # close our utility files
+        compressed.close()
+        gz.close()
+        # init our stringio selves with the string 
+        StringIO.__init__(self, str)
+        del str
+
+    def close(self):
+        self.fp.close()
+        return StringIO.close(self)
+
+
+def test():
+    """Test this module.
+
+       At the moment this is lame.
+    """
+
+    print "Running unit tests.\n"
+
+    def printcomp(fp):
+        try:
+            if fp.iscompressed():
+                print "GET was compressed.\n"
+            else:
+                print "GET was uncompressed.\n"
+        except:
+            print "no iscompressed function!  this shouldn't happen"
+
+    print "Trying to GET a compressed document...\n"
+    fp = urlopen('http://a.scarywater.net/hng/index.shtml')
+    print fp.read()
+    printcomp(fp)
+    fp.close()
+
+    print "Trying to GET an unknown document...\n"
+    fp = urlopen('http://www.otaku.org/')
+    print fp.read()
+    printcomp(fp)
+    fp.close()
+
+
+#
+# Install the HTTPContentEncodingHandler that we've defined above.
+#
+install_opener(build_opener(HTTPContentEncodingHandler, ProxyHandler({})))
+
+if __name__ == '__main__':
+    test()
+