summaryrefslogtreecommitdiffstats
path: root/cnucnu/helper.py
diff options
context:
space:
mode:
Diffstat (limited to 'cnucnu/helper.py')
-rw-r--r--cnucnu/helper.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/cnucnu/helper.py b/cnucnu/helper.py
index 8614c44..a3d20d3 100644
--- a/cnucnu/helper.py
+++ b/cnucnu/helper.py
@@ -24,10 +24,34 @@ __docformat__ = "restructuredtext"
#from twisted.internet import reactor
+import re
import pprint as pprint_module
pp = pprint_module.PrettyPrinter(indent=4)
pprint = pp.pprint
+__html_regex = re.compile(r'\bhref\s*=\s*["\']([^"\'/]+)/["\']', re.I)
+__text_regex = re.compile(r'^d.+\s(\S+)\s*$', re.I|re.M)
+
+def expand_subdirs(url):
+ """ Expand all /^/'s in the given URL with the latest dir at that level """
+ ix = url.find("/^/")
+ while ix != -1:
+ ls = get_html(url[0:ix+1])
+ if not ls:
+ break
+ subdirs = []
+ regex = url.startswith("ftp://") and __text_regex or __html_regex
+ for match in regex.finditer(ls):
+ subdir = match.group(1)
+ if subdir not in (".", ".."):
+ subdirs.append(subdir)
+ if not subdirs:
+ break
+ latest = upstream_max(subdirs)
+ url = "%s/%s/%s" % (url[0:ix], latest, url[ix+len("/^/"):])
+ ix = url.find("/^/", ix + len(latest) + 1)
+ return url
+
def get_html(url, callback=None, errback=None):
if url.startswith("ftp://"):
import urllib