summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVille Skyttä <ville.skytta@iki.fi>2011-02-05 10:03:06 +0200
committerTill Maas <opensource@till.name>2011-02-20 19:09:21 +0100
commitc9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (patch)
treea195c44ea43221ced4e8b5d5d78fb9dd86211632
parentfb56c0e01105d4ab1a34920f377c66277465d534 (diff)
downloadcnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.gz
cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.xz
cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.zip
Implement subdirectory handling.
Each /^/ in the given URL is replaced by the latest version dir at that level.
-rw-r--r--cnucnu/helper.py24
-rwxr-xr-xcnucnu/package_list.py3
-rwxr-xr-xcnucnu/tests/helper_test.py3
3 files changed, 28 insertions, 2 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index 8614c44..a3d20d3 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -24,10 +24,34 @@ __docformat__ = "restructuredtext"
#from twisted.internet import reactor
+import re
import pprint as pprint_module
pp = pprint_module.PrettyPrinter(indent=4)
pprint = pp.pprint
+__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I)
+__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M)
+
+def expand_subdirs(url):
+ """ Expand all /^/'s in the given URL with the latest dir at that level """
+ ix = url.find("/^/")
+ while ix != -1:
+ ls = get_html(url[0:ix+1])
+ if not ls:
+ break
+ subdirs = []
+ regex = url.startswith("ftp://") and __text_regex or __html_regex
+ for match in regex.finditer(ls):
+ subdir = match.group(1)
+ if subdir not in (".", ".."):
+ subdirs.append(subdir)
+ if not subdirs:
+ break
+ latest = upstream_max(subdirs)
+ url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):])
+ ix = url.find("/^/", ix + len(latest) + 1)
+ return url
+
def get_html(url, callback=None, errback=None):
if url.startswith("ftp://"):
import urllib
diff --git a/cnucnu/package_list.py b/cnucnu/package_list.py
index de15cdb..182795d 100755
--- a/cnucnu/package_list.py
+++ b/cnucnu/package_list.py
@@ -229,9 +229,10 @@ class Package(object):
def get_html(self):
if not self._html:
- from cnucnu.helper import get_html
+ from cnucnu.helper import get_html, expand_subdirs
try:
+ self.__url = expand_subdirs(self.url)
html = get_html(self.url)
# TODO: get_html should raise a generic retrieval error
except IOError, ioe:
diff --git a/cnucnu/tests/helper_test.py b/cnucnu/tests/helper_test.py
index fddd489..b925a12 100755
--- a/cnucnu/tests/helper_test.py
+++ b/cnucnu/tests/helper_test.py
@@ -22,7 +22,7 @@ import unittest
import sys
sys.path.insert(0, '../..')
-from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html
+from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html, expand_subdirs
class HelperTest(unittest.TestCase):
@@ -130,6 +130,7 @@ class HelperTest(unittest.TestCase):
http_url = ("http://www.fedoraproject.org")
res = StringIO.StringIO()
+ http_url = expand_subdirs(http_url)
data1 = get_html(http_url)
callback = [res.write, lambda ignore: reactor.stop()]