1 files changed, 24 insertions, 0 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index 8614c44..a3d20d3 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -24,10 +24,34 @@ __docformat__ = "restructuredtext"
 
 #from twisted.internet import reactor
 
+import re
 import pprint as pprint_module
 pp = pprint_module.PrettyPrinter(indent=4)
 pprint = pp.pprint
 
+__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I)
+__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M)
+
+def expand_subdirs(url):
+    """ Expand all /^/'s in the given URL with the latest dir at that level """
+    ix = url.find("/^/")
+    while ix != -1:
+        ls = get_html(url[0:ix+1])
+        if not ls:
+            break
+        subdirs = []
+        regex = url.startswith("ftp://") and __text_regex or __html_regex
+        for match in regex.finditer(ls):
+            subdir = match.group(1)
+            if subdir not in (".", ".."):
+                subdirs.append(subdir)
+        if not subdirs:
+            break
+        latest = upstream_max(subdirs)
+        url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):])
+        ix = url.find("/^/", ix + len(latest) + 1)
+    return url
+
 def get_html(url, callback=None, errback=None):
     if url.startswith("ftp://"):
         import urllib