Use pycurl to get plain URLs, follow redirects

urlllib seems to urlunescape HTTP location headers to get the url to follow. If this URL contains spaces, a bad request is created. Python bug report: http://bugs.python.org/issue6557
author: Till Maas <opensource@till.name> 2009-07-23 23:59:03 +0200
committer: Till Maas <opensource@till.name> 2009-07-23 23:59:03 +0200
commit: 4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9 (patch)
tree: 57d538f3fe82c3782418b0a9657229520a348c71 /cnucnu/helper.py
parent: bcd08cd6af86feebd72561c0b8d512d2e96ac31c (diff)
download: cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.gz
cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.xz
cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.zip
1 files changed, 22 insertions, 3 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index f2d61de..b2340b8 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -22,9 +22,24 @@ pp = pprint_module.PrettyPrinter(indent=4)
 pprint = pp.pprint
 
 def get_html(url):
-    import urllib
-    res = urllib.urlopen(url)
-    return res.read()
+    import pycurl
+    import StringIO
+
+    c = pycurl.Curl()
+    c.setopt(pycurl.URL, url.encode("ascii"))
+
+    res = StringIO.StringIO()
+
+    c.setopt(pycurl.WRITEFUNCTION, res.write)
+    c.setopt(pycurl.FOLLOWLOCATION, 1)
+    c.setopt(pycurl.MAXREDIRS, 10)
+
+    c.perform()
+    c.close()
+    data = res.getvalue()
+    res.close()
+
+    return data
 
 def rpm_cmp(v1, v2):
     import rpm
@@ -63,6 +78,10 @@ def secure_download(url, cainfo=""):
 
     c.setopt(pycurl.WRITEFUNCTION, res.write)
 
+    # follow up to 10 http location: headers
+    c.setopt(pycurl.FOLLOWLOCATION, 1)
+    c.setopt(pycurl.MAXREDIRS, 10)
+
     c.perform()
     c.close()
     data = res.getvalue()
author	Till Maas <opensource@till.name>	2009-07-23 23:59:03 +0200
committer	Till Maas <opensource@till.name>	2009-07-23 23:59:03 +0200
commit	4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9 (patch)
tree	57d538f3fe82c3782418b0a9657229520a348c71 /cnucnu/helper.py
parent	bcd08cd6af86feebd72561c0b8d512d2e96ac31c (diff)
download	cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.gz cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.tar.xz cnucnu-4891b5cc78cddc5b693ad6bd3c0e4ce7a89a74b9.zip