Refactor expand_subdirs

author: Till Maas <opensource@till.name> 2011-02-24 20:45:47 +0100
committer: Till Maas <opensource@till.name> 2011-02-24 20:45:47 +0100
commit: f9189cd74b583ffcf33fe6580027e06f058abfdf (patch)
tree: 332c9a9561bf9edc27c726a0831d435a7758586e
parent: c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (diff)
download: cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.gz
cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.xz
cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.zip
1 files changed, 24 insertions, 11 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index a3d20d3..2026574 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -32,24 +32,37 @@ pprint = pp.pprint
 __html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I)
 __text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M)
 
-def expand_subdirs(url):
-    """ Expand all /^/'s in the given URL with the latest dir at that level """
-    ix = url.find("/^/")
-    while ix != -1:
-        ls = get_html(url[0:ix+1])
-        if not ls:
-            break
+def expand_subdirs(url, glob_char="*"):
+    """ Expand glob_char in the given URL with the latest dir at that level
+        Example URL: http://www.example.com/foo/*/
+
+        The globbing char needs to be enclosed by slashes like "/*/".
+    """
+    glob_pattern = "/%s/" % glob_char
+    glob_pos = url.find(glob_pattern)
+
+    # url until first slash before glob_char
+    url_prefix = url[0:glob_pos+1]
+
+    # everything after the slash after glob_char
+    url_suffix = url[glob_pos+len(glob_pattern):]
+
+    if url_prefix != "":
+        dir_listing = get_html(url_prefix)
+        if not dir_listing:
+            return url
         subdirs = []
         regex = url.startswith("ftp://") and __text_regex or __html_regex
-        for match in regex.finditer(ls):
+        for match in regex.finditer(dir_listing):
             subdir = match.group(1)
             if subdir not in (".", ".."):
                 subdirs.append(subdir)
         if not subdirs:
-            break
+            return url
         latest = upstream_max(subdirs)
-        url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):])
-        ix = url.find("/^/", ix + len(latest) + 1)
+
+        url = "%s%s/%s" % (url_prefix, latest, url_suffix)
+        return expand_subdirs(url, glob_char)
     return url
 
 def get_html(url, callback=None, errback=None):
author	Till Maas <opensource@till.name>	2011-02-24 20:45:47 +0100
committer	Till Maas <opensource@till.name>	2011-02-24 20:45:47 +0100
commit	f9189cd74b583ffcf33fe6580027e06f058abfdf (patch)
tree	332c9a9561bf9edc27c726a0831d435a7758586e
parent	c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (diff)
download	cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.gz cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.xz cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.zip