From c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f Mon Sep 17 00:00:00 2001 From: Ville Skyttä Date: Sat, 5 Feb 2011 10:03:06 +0200 Subject: Implement subdirectory handling. Each /^/ in the given URL is replaced by the latest version dir at that level. --- cnucnu/helper.py | 24 ++++++++++++++++++++++++ cnucnu/package_list.py | 3 ++- cnucnu/tests/helper_test.py | 3 ++- 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'cnucnu') diff --git a/cnucnu/helper.py b/cnucnu/helper.py index 8614c44..a3d20d3 100644 --- a/cnucnu/helper.py +++ b/cnucnu/helper.py @@ -24,10 +24,34 @@ __docformat__ = "restructuredtext" #from twisted.internet import reactor +import re import pprint as pprint_module pp = pprint_module.PrettyPrinter(indent=4) pprint = pp.pprint +__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I) +__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M) + +def expand_subdirs(url): + """ Expand all /^/'s in the given URL with the latest dir at that level """ + ix = url.find("/^/") + while ix != -1: + ls = get_html(url[0:ix+1]) + if not ls: + break + subdirs = [] + regex = url.startswith("ftp://") and __text_regex or __html_regex + for match in regex.finditer(ls): + subdir = match.group(1) + if subdir not in (".", ".."): + subdirs.append(subdir) + if not subdirs: + break + latest = upstream_max(subdirs) + url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):]) + ix = url.find("/^/", ix + len(latest) + 1) + return url + def get_html(url, callback=None, errback=None): if url.startswith("ftp://"): import urllib diff --git a/cnucnu/package_list.py b/cnucnu/package_list.py index de15cdb..182795d 100755 --- a/cnucnu/package_list.py +++ b/cnucnu/package_list.py @@ -229,9 +229,10 @@ class Package(object): def get_html(self): if not self._html: - from cnucnu.helper import get_html + from cnucnu.helper import get_html, expand_subdirs try: + self.__url = expand_subdirs(self.url) html = get_html(self.url) # TODO: get_html should raise a generic retrieval error except IOError, ioe: diff --git a/cnucnu/tests/helper_test.py b/cnucnu/tests/helper_test.py index fddd489..b925a12 100755 --- a/cnucnu/tests/helper_test.py +++ b/cnucnu/tests/helper_test.py @@ -22,7 +22,7 @@ import unittest import sys sys.path.insert(0, '../..') -from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html +from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html, expand_subdirs class HelperTest(unittest.TestCase): @@ -130,6 +130,7 @@ class HelperTest(unittest.TestCase): http_url = ("http://www.fedoraproject.org") res = StringIO.StringIO() + http_url = expand_subdirs(http_url) data1 = get_html(http_url) callback = [res.write, lambda ignore: reactor.stop()] -- cgit