summaryrefslogtreecommitdiffstats
path: root/BitTorrent/zurllib.py
diff options
context:
space:
mode:
Diffstat (limited to 'BitTorrent/zurllib.py')
-rw-r--r--BitTorrent/zurllib.py269
1 files changed, 269 insertions, 0 deletions
diff --git a/BitTorrent/zurllib.py b/BitTorrent/zurllib.py
new file mode 100644
index 0000000..8bbe5ba
--- /dev/null
+++ b/BitTorrent/zurllib.py
@@ -0,0 +1,269 @@
+#
+# zurllib.py
+#
+# This is (hopefully) a drop-in for urllib which will request gzip/deflate
+# compression and then decompress the output if a compressed response is
+# received while maintaining the API.
+#
+# by Robert Stone 2/22/2003
+# extended by Matt Chisholm
+# tracker announce --bind support added by Jeremy Evans 11/2005
+
+import sys
+
+import threading
+import thread
+from BitTorrent import PeerID
+user_agent = PeerID.make_id()
+del PeerID
+
+import urllib2
+OldOpenerDirector = urllib2.OpenerDirector
+
+class MyOpenerDirector(OldOpenerDirector):
+ def __init__(self):
+ OldOpenerDirector.__init__(self)
+ self.addheaders = [('User-agent', user_agent)]
+
+urllib2.OpenerDirector = MyOpenerDirector
+
+del urllib2
+
+from httplib import HTTPConnection, HTTP
+from urllib import *
+from urllib2 import *
+from gzip import GzipFile
+from StringIO import StringIO
+import pprint
+
+DEBUG=0
+
+http_bindaddr = None
+
+# ow ow ow.
+# this is here so we can track open http connections in our pending
+# connection count. we have to buffer because maybe urllib connections
+# start before rawserver does - hopefully not more than 10 of them!
+#
+# this can all go away when we use a reasonable http client library
+# and the connections are managed inside rawserver
+class PreRawServerBuffer(object):
+ def __init__(self):
+ self.pending_sockets = {}
+ self.pending_sockets_lock = threading.Lock()
+
+ def _add_pending_connection(self, addr):
+ # the XP connection rate limiting is unique at the IP level
+ assert isinstance(addr, str)
+ self.pending_sockets_lock.acquire()
+ self.__add_pending_connection(addr)
+ self.pending_sockets_lock.release()
+
+ def __add_pending_connection(self, addr):
+ if addr not in self.pending_sockets:
+ self.pending_sockets[addr] = 1
+ else:
+ self.pending_sockets[addr] += 1
+
+ def _remove_pending_connection(self, addr):
+ self.pending_sockets_lock.acquire()
+ self.__remove_pending_connection(addr)
+ self.pending_sockets_lock.release()
+
+ def __remove_pending_connection(self, addr):
+ self.pending_sockets[addr] -= 1
+ if self.pending_sockets[addr] <= 0:
+ del self.pending_sockets[addr]
+rawserver = PreRawServerBuffer()
+
+def bind_tracker_connection(bindaddr):
+ global http_bindaddr
+ http_bindaddr = bindaddr
+
+def set_zurllib_rawserver(new_rawserver):
+ global rawserver
+ for addr in rawserver.pending_sockets:
+ new_rawserver._add_pending_connections(addr)
+ rawserver._remove_pending_connection(addr)
+ assert len(rawserver.pending_sockets) == 0
+ rawserver = new_rawserver
+
+unsafe_threads = []
+def add_unsafe_thread():
+ global unsafe_threads
+ unsafe_threads.append(thread.get_ident())
+
+class BindingHTTPConnection(HTTPConnection):
+ def connect(self):
+
+ ident = thread.get_ident()
+ # never, ever, ever call urlopen from any of these threads
+ assert ident not in unsafe_threads
+
+ """Connect to the host and port specified in __init__."""
+ msg = "getaddrinfo returns an empty list"
+ for res in socket.getaddrinfo(self.host, self.port, 0,
+ socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+
+ addr = sa[0]
+ # the obvious multithreading problem is avoided by using locks.
+ # the lock is only acquired during the function call, so there's
+ # no danger of urllib blocking rawserver.
+ rawserver._add_pending_connection(addr)
+ try:
+ self.sock = socket.socket(af, socktype, proto)
+ if http_bindaddr:
+ self.sock.bind((http_bindaddr, 0))
+ if self.debuglevel > 0:
+ print "connect: (%s, %s)" % (self.host, self.port)
+ self.sock.connect(sa)
+ except socket.error, msg:
+ if self.debuglevel > 0:
+ print 'connect fail:', (self.host, self.port)
+ if self.sock:
+ self.sock.close()
+ self.sock = None
+ rawserver._remove_pending_connection(addr)
+
+ if self.sock:
+ break
+
+ if not self.sock:
+ raise socket.error, msg
+
+class BindingHTTP(HTTP):
+ _connection_class = BindingHTTPConnection
+
+if sys.version_info >= (2,4):
+ BindingHTTP = BindingHTTPConnection
+
+class HTTPContentEncodingHandler(HTTPHandler):
+ """Inherit and add gzip/deflate/etc support to HTTP gets."""
+ def http_open(self, req):
+ # add the Accept-Encoding header to the request
+ # support gzip encoding (identity is assumed)
+ req.add_header("Accept-Encoding","gzip")
+ if DEBUG:
+ print "Sending:"
+ print req.headers
+ print "\n"
+ fp = self.do_open(BindingHTTP, req)
+ headers = fp.headers
+ if DEBUG:
+ pprint.pprint(headers.dict)
+ url = fp.url
+ resp = addinfourldecompress(fp, headers, url)
+ if hasattr(fp, 'code'):
+ resp.code = fp.code
+ if hasattr(fp, 'msg'):
+ resp.msg = fp.msg
+ return resp
+
+class addinfourldecompress(addinfourl):
+ """Do gzip decompression if necessary. Do addinfourl stuff too."""
+ def __init__(self, fp, headers, url):
+ # we need to do something more sophisticated here to deal with
+ # multiple values? What about other weird crap like q-values?
+ # basically this only works for the most simplistic case and will
+ # break in some other cases, but for now we only care about making
+ # this work with the BT tracker so....
+ if headers.has_key('content-encoding') and headers['content-encoding'] == 'gzip':
+ if DEBUG:
+ print "Contents of Content-encoding: " + headers['Content-encoding'] + "\n"
+ self.gzip = 1
+ self.rawfp = fp
+ fp = GzipStream(fp)
+ else:
+ self.gzip = 0
+ return addinfourl.__init__(self, fp, headers, url)
+
+ def close(self):
+ self.fp.close()
+ if self.gzip:
+ self.rawfp.close()
+
+ def iscompressed(self):
+ return self.gzip
+
+class GzipStream(StringIO):
+ """Magically decompress a file object.
+
+ This is not the most efficient way to do this but GzipFile() wants
+ to seek, etc, which won't work for a stream such as that from a socket.
+ So we copy the whole shebang info a StringIO object, decompress that
+ then let people access the decompressed output as a StringIO object.
+
+ The disadvantage is memory use and the advantage is random access.
+
+ Will mess with fixing this later.
+ """
+
+ def __init__(self,fp):
+ self.fp = fp
+
+ # this is nasty and needs to be fixed at some point
+ # copy everything into a StringIO (compressed)
+ compressed = StringIO()
+ r = fp.read()
+ while r:
+ compressed.write(r)
+ r = fp.read()
+ # now, unzip (gz) the StringIO to a string
+ compressed.seek(0,0)
+ gz = GzipFile(fileobj = compressed)
+ str = ''
+ r = gz.read()
+ while r:
+ str += r
+ r = gz.read()
+ # close our utility files
+ compressed.close()
+ gz.close()
+ # init our stringio selves with the string
+ StringIO.__init__(self, str)
+ del str
+
+ def close(self):
+ self.fp.close()
+ return StringIO.close(self)
+
+
+def test():
+ """Test this module.
+
+ At the moment this is lame.
+ """
+
+ print "Running unit tests.\n"
+
+ def printcomp(fp):
+ try:
+ if fp.iscompressed():
+ print "GET was compressed.\n"
+ else:
+ print "GET was uncompressed.\n"
+ except:
+ print "no iscompressed function! this shouldn't happen"
+
+ print "Trying to GET a compressed document...\n"
+ fp = urlopen('http://a.scarywater.net/hng/index.shtml')
+ print fp.read()
+ printcomp(fp)
+ fp.close()
+
+ print "Trying to GET an unknown document...\n"
+ fp = urlopen('http://www.otaku.org/')
+ print fp.read()
+ printcomp(fp)
+ fp.close()
+
+
+#
+# Install the HTTPContentEncodingHandler that we've defined above.
+#
+install_opener(build_opener(HTTPContentEncodingHandler, ProxyHandler({})))
+
+if __name__ == '__main__':
+ test()
+