Implement subdirectory handling.

Each /^/ in the given URL is replaced by the latest version dir at that level.
author: Ville Skyttä <ville.skytta@iki.fi> 2011-02-05 10:03:06 +0200
committer: Till Maas <opensource@till.name> 2011-02-20 19:09:21 +0100
commit: c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (patch)
tree: a195c44ea43221ced4e8b5d5d78fb9dd86211632
parent: fb56c0e01105d4ab1a34920f377c66277465d534 (diff)
download: cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.gz
cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.xz
cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.zip
3 files changed, 28 insertions, 2 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index 8614c44..a3d20d3 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -24,10 +24,34 @@ __docformat__ = "restructuredtext"
 
 #from twisted.internet import reactor
 
+import re
 import pprint as pprint_module
 pp = pprint_module.PrettyPrinter(indent=4)
 pprint = pp.pprint
 
+__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I)
+__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M)
+
+def expand_subdirs(url):
+    """ Expand all /^/'s in the given URL with the latest dir at that level """
+    ix = url.find("/^/")
+    while ix != -1:
+        ls = get_html(url[0:ix+1])
+        if not ls:
+            break
+        subdirs = []
+        regex = url.startswith("ftp://") and __text_regex or __html_regex
+        for match in regex.finditer(ls):
+            subdir = match.group(1)
+            if subdir not in (".", ".."):
+                subdirs.append(subdir)
+        if not subdirs:
+            break
+        latest = upstream_max(subdirs)
+        url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):])
+        ix = url.find("/^/", ix + len(latest) + 1)
+    return url
+
 def get_html(url, callback=None, errback=None):
     if url.startswith("ftp://"):
         import urllib
diff --git a/cnucnu/package_list.py b/cnucnu/package_list.py
index de15cdb..182795d 100755
--- a/cnucnu/package_list.py
+++ b/cnucnu/package_list.py
@@ -229,9 +229,10 @@ class Package(object):
 
     def get_html(self):
         if not self._html:
-            from cnucnu.helper import get_html
+            from cnucnu.helper import get_html, expand_subdirs
 
             try:
+                self.__url = expand_subdirs(self.url)
                 html = get_html(self.url)
             # TODO: get_html should raise a generic retrieval error
             except IOError, ioe:
diff --git a/cnucnu/tests/helper_test.py b/cnucnu/tests/helper_test.py
index fddd489..b925a12 100755
--- a/cnucnu/tests/helper_test.py
+++ b/cnucnu/tests/helper_test.py
@@ -22,7 +22,7 @@ import unittest
 import sys
 sys.path.insert(0, '../..')
 
-from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html
+from cnucnu.helper import upstream_cmp, upstream_max, split_rc, cmp_upstream_repo, get_rc, get_html, expand_subdirs
 
 class HelperTest(unittest.TestCase):
 
@@ -130,6 +130,7 @@ class HelperTest(unittest.TestCase):
         http_url = ("http://www.fedoraproject.org")
         res = StringIO.StringIO()
 
+        http_url = expand_subdirs(http_url)
         data1 = get_html(http_url)
 
         callback = [res.write, lambda ignore: reactor.stop()]
author	Ville Skyttä <ville.skytta@iki.fi>	2011-02-05 10:03:06 +0200
committer	Till Maas <opensource@till.name>	2011-02-20 19:09:21 +0100
commit	c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (patch)
tree	a195c44ea43221ced4e8b5d5d78fb9dd86211632
parent	fb56c0e01105d4ab1a34920f377c66277465d534 (diff)
download	cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.gz cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.tar.xz cnucnu-c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f.zip