diff options
author | Till Maas <opensource@till.name> | 2011-02-24 20:45:47 +0100 |
---|---|---|
committer | Till Maas <opensource@till.name> | 2011-02-24 20:45:47 +0100 |
commit | f9189cd74b583ffcf33fe6580027e06f058abfdf (patch) | |
tree | 332c9a9561bf9edc27c726a0831d435a7758586e /cnucnu/helper.py | |
parent | c9c8c71a1204fbd4dc19fe525f62b04b5fd1134f (diff) | |
download | cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.gz cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.tar.xz cnucnu-f9189cd74b583ffcf33fe6580027e06f058abfdf.zip |
Refactor expand_subdirs
Diffstat (limited to 'cnucnu/helper.py')
-rw-r--r-- | cnucnu/helper.py | 35 |
1 files changed, 24 insertions, 11 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py index a3d20d3..2026574 100644 --- a/cnucnu/helper.py +++ b/cnucnu/helper.py @@ -32,24 +32,37 @@ pprint = pp.pprint __html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I) __text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M) -def expand_subdirs(url): - """ Expand all /^/'s in the given URL with the latest dir at that level """ - ix = url.find("/^/") - while ix != -1: - ls = get_html(url[0:ix+1]) - if not ls: - break +def expand_subdirs(url, glob_char="*"): + """ Expand glob_char in the given URL with the latest dir at that level + Example URL: http://www.example.com/foo/*/ + + The globbing char needs to be enclosed by slashes like "/*/". + """ + glob_pattern = "/%s/" % glob_char + glob_pos = url.find(glob_pattern) + + # url until first slash before glob_char + url_prefix = url[0:glob_pos+1] + + # everything after the slash after glob_char + url_suffix = url[glob_pos+len(glob_pattern):] + + if url_prefix != "": + dir_listing = get_html(url_prefix) + if not dir_listing: + return url subdirs = [] regex = url.startswith("ftp://") and __text_regex or __html_regex - for match in regex.finditer(ls): + for match in regex.finditer(dir_listing): subdir = match.group(1) if subdir not in (".", ".."): subdirs.append(subdir) if not subdirs: - break + return url latest = upstream_max(subdirs) - url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):]) - ix = url.find("/^/", ix + len(latest) + 1) + + url = "%s%s/%s" % (url_prefix, latest, url_suffix) + return expand_subdirs(url, glob_char) return url def get_html(url, callback=None, errback=None): |