diff options
author | Till Maas <opensource@till.name> | 2009-07-23 23:59:03 +0200 |
---|---|---|
committer | Till Maas <opensource@till.name> | 2009-07-23 23:59:03 +0200 |
commit | 4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9 (patch) | |
tree | 57d538f3fe82c3782418b0a9657229520a348c71 /cnucnu/helper.py | |
parent | bcd08cd6af86feebd72561c0b8d512d2e96ac31c (diff) | |
download | cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.gz cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.xz cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.zip |
Use pycurl to get plain URLs, follow redirects
urlllib seems to urlunescape HTTP location headers to get the url to
follow. If this URL contains spaces, a bad request is created.
Python bug report:
http://bugs.python.org/issue6557
Diffstat (limited to 'cnucnu/helper.py')
-rw-r--r-- | cnucnu/helper.py | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py index f2d61de..b2340b8 100644 --- a/cnucnu/helper.py +++ b/cnucnu/helper.py @@ -22,9 +22,24 @@ pp = pprint_module.PrettyPrinter(indent=4) pprint = pp.pprint def get_html(url): - import urllib - res = urllib.urlopen(url) - return res.read() + import pycurl + import StringIO + + c = pycurl.Curl() + c.setopt(pycurl.URL, url.encode("ascii")) + + res = StringIO.StringIO() + + c.setopt(pycurl.WRITEFUNCTION, res.write) + c.setopt(pycurl.FOLLOWLOCATION, 1) + c.setopt(pycurl.MAXREDIRS, 10) + + c.perform() + c.close() + data = res.getvalue() + res.close() + + return data def rpm_cmp(v1, v2): import rpm @@ -63,6 +78,10 @@ def secure_download(url, cainfo=""): c.setopt(pycurl.WRITEFUNCTION, res.write) + # follow up to 10 http location: headers + c.setopt(pycurl.FOLLOWLOCATION, 1) + c.setopt(pycurl.MAXREDIRS, 10) + c.perform() c.close() data = res.getvalue() |