diff options
author | Ville Skyttä <ville.skytta@iki.fi> | 2011-02-05 10:03:06 +0200 |
---|---|---|
committer | Till Maas <opensource@till.name> | 2011-02-20 19:09:21 +0100 |
commit | c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (patch) | |
tree | a195c44ea43221ced4e8b5d5d78fb9dd86211632 | |
parent | fb56c0e01105d4ab1a34920f377c66277465d534 (diff) | |
download | cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.gz cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.xz cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.zip |
Implement subdirectory handling.
Each /^/ in the given URL is replaced by the latest version dir at that level.
-rw-r--r-- | cnucnu/helper.py | 24 | ||||
-rwxr-xr-x | cnucnu/package_list.py | 3 | ||||
-rwxr-xr-x | cnucnu/tests/helper_test.py | 3 |
3 files changed, 28 insertions, 2 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py index 8614c44..a3d20d3 100644 --- a/cnucnu/helper.py +++ b/cnucnu/helper.py @@ -24,10 +24,34 @@ __docformat__ = "restructuredtext" #from twisted.internet import reactor +import re import pprint as pprint_module pp = pprint_module.PrettyPrinter(indent=4) pprint = pp.pprint +__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I) +__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M) + +def expand_subdirs(url): + """ Expand all /^/'s in the given URL with the latest dir at that level """ + ix = url.find("/^/") + while ix != -1: + ls = get_html(url[0:ix+1]) + if not ls: + break + subdirs = [] + regex = url.startswith("ftp://") and __text_regex or __html_regex + for match in regex.finditer(ls): + subdir = match.group(1) + if subdir not in (".", ".."): + subdirs.append(subdir) + if not subdirs: + break + latest = upstream_max(subdirs) + url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):]) + ix = url.find("/^/", ix + len(latest) + 1) + return url + def get_html(url, callback=None, errback=None): if url.startswith("ftp://"): import urllib diff --git a/cnucnu/package_list.py b/cnucnu/package_list.py index de15cdb..182795d 100755 --- a/cnucnu/package_list.py +++ b/cnucnu/package_list.py @@ -229,9 +229,10 @@ class Package(object): def get_html(self): if not self._html: - from cnucnu.helper import get_html + from cnucnu.helper import get_html, expand_subdirs try: + self.__url = expand_subdirs(self.url) html = get_html(self.url) # TODO: get_html should raise a generic retrieval error except IOError, ioe: diff --git a/cnucnu/tests/helper_test.py b/cnucnu/tests/helper_test.py index fddd489..b925a12 100755 --- a/cnucnu/tests/helper_test.py +++ b/cnucnu/tests/helper_test.py @@ -22,7 +22,7 @@ import unittest import sys sys.path.insert(0, '../..') -from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html +from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html, expand_subdirs class HelperTest(unittest.TestCase): @@ -130,6 +130,7 @@ class HelperTest(unittest.TestCase): http_url = ("http://www.fedoraproject.org") res = StringIO.StringIO() + http_url = expand_subdirs(http_url) data1 = get_html(http_url) callback = [res.write, lambda ignore: reactor.stop()] |