1 files changed, 288 insertions, 0 deletions
diff --git a/BitTorrent/ConvertedMetainfo.py b/BitTorrent/ConvertedMetainfo.py
new file mode 100644
index 0000000..49eda7e
--- /dev/null
+++ b/BitTorrent/ConvertedMetainfo.py
@@ -0,0 +1,288 @@
+# The contents of this file are subject to the BitTorrent Open Source License
+# Version 1.1 (the License).  You may not copy or use this file, in either
+# source code or executable form, except in compliance with the License.  You
+# may obtain a copy of the License at http://www.bittorrent.com/license/.
+#
+# Software distributed under the License is distributed on an AS IS basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the License
+# for the specific language governing rights and limitations under the
+# License.
+
+# Written by Uoti Urpala
+
+# required for Python 2.2
+from __future__ import generators
+
+import os
+import sys
+from sha import sha
+
+from BitTorrent.obsoletepythonsupport import *
+
+from BitTorrent.bencode import bencode
+from BitTorrent import btformats
+from BitTorrent import BTFailure, WARNING, ERROR
+
+
+WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|'
+windows_translate = [chr(i) for i in range(256)]
+for x in WINDOWS_UNSUPPORTED_CHARS:
+    windows_translate[ord(x)] = '-'
+windows_translate = ''.join(windows_translate)
+
+noncharacter_translate = {}
+for i in range(0xD800, 0xE000):
+    noncharacter_translate[i] = ord('-')
+for i in range(0xFDD0, 0xFDF0):
+    noncharacter_translate[i] = ord('-')
+for i in (0xFFFE, 0xFFFF):
+    noncharacter_translate[i] = ord('-')
+
+del x, i
+
+def set_filesystem_encoding(encoding, errorfunc):
+    global filesystem_encoding
+    filesystem_encoding = 'ascii'
+    if encoding == '':
+        try:
+            sys.getfilesystemencoding
+        except AttributeError:
+            errorfunc(WARNING,
+                      _("This seems to be an old Python version which "
+                        "does not support detecting the filesystem "
+                        "encoding. Assuming 'ascii'."))
+            return
+        encoding = sys.getfilesystemencoding()
+        if encoding is None:
+            errorfunc(WARNING,
+                      _("Python failed to autodetect filesystem encoding. "
+                        "Using 'ascii' instead."))
+            return
+    try:
+        'a1'.decode(encoding)
+    except:
+        errorfunc(ERROR,
+                  _("Filesystem encoding '%s' is not supported. "
+                    "Using 'ascii' instead.") % encoding)
+        return
+    filesystem_encoding = encoding
+
+
+def generate_names(name, is_dir):
+    if is_dir:
+        prefix = name + '.'
+        suffix = ''
+    else:
+        pos = name.rfind('.')
+        if pos == -1:
+            pos = len(name)
+        prefix = name[:pos] + '.'
+        suffix = name[pos:]
+    i = 0
+    while True:
+        yield prefix + str(i) + suffix
+        i += 1
+
+
+class ConvertedMetainfo(object):
+
+    def __init__(self, metainfo):
+        self.bad_torrent_wrongfield = False
+        self.bad_torrent_unsolvable = False
+        self.bad_torrent_noncharacter = False
+        self.bad_conversion = False
+        self.bad_windows = False
+        self.bad_path = False
+        self.reported_errors = False
+        self.is_batch = False
+        self.orig_files = None
+        self.files_fs = None
+        self.total_bytes = 0
+        self.sizes = []
+        self.comment = None
+
+        btformats.check_message(metainfo, check_paths=False)
+        info = metainfo['info']
+        if info.has_key('length'):
+            self.total_bytes = info['length']
+            self.sizes.append(self.total_bytes)
+        else:
+            self.is_batch = True
+            r = []
+            self.orig_files = []
+            self.sizes = []
+            i = 0
+            for f in info['files']:
+                l = f['length']
+                self.total_bytes += l
+                self.sizes.append(l)
+                path = self._get_attr_utf8(f, 'path')
+                for x in path:
+                    if not btformats.allowed_path_re.match(x):
+                        if l > 0:
+                            raise BTFailure(_("Bad file path component: ")+x)
+                        # BitComet makes bad .torrent files with empty
+                        # filename part
+                        self.bad_path = True
+                        break
+                else:
+                    p = []
+                    for x in path:
+                        p.append((self._enforce_utf8(x), x))
+                    path = p
+                    self.orig_files.append('/'.join([x[0] for x in path]))
+                    k = []
+                    for u,o in path:
+                        tf2 = self._to_fs_2(u)
+                        k.append((tf2, u, o))
+                    r.append((k,i))
+                    i += 1
+            # If two or more file/subdirectory names in the same directory
+            # would map to the same name after encoding conversions + Windows
+            # workarounds, change them. Files are changed as
+            # 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without
+            # '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original
+            # names was a "clean" conversion, that one is always unchanged
+            # and the rest are adjusted.
+            r.sort()
+            self.files_fs = [None] * len(r)
+            prev = [None]
+            res = []
+            stack = [{}]
+            for x in r:
+                j = 0
+                x, i = x
+                while x[j] == prev[j]:
+                    j += 1
+                del res[j:]
+                del stack[j+1:]
+                name = x[j][0][1]
+                if name in stack[-1]:
+                    for name in generate_names(x[j][1], j != len(x) - 1):
+                        name = self._to_fs(name)
+                        if name not in stack[-1]:
+                            break
+                stack[-1][name] = None
+                res.append(name)
+                for j in range(j + 1, len(x)):
+                    name = x[j][0][1]
+                    stack.append({name: None})
+                    res.append(name)
+                self.files_fs[i] = os.path.join(*res)
+                prev = x
+
+        self.name = self._get_field_utf8(info, 'name')
+        self.name_fs = self._to_fs(self.name)
+        self.piece_length = info['piece length']
+        self.is_trackerless = False
+        if metainfo.has_key('announce'):
+            self.announce = metainfo['announce']
+        elif metainfo.has_key('nodes'):
+            self.is_trackerless = True
+            self.nodes = metainfo['nodes']
+
+        if metainfo.has_key('comment'):
+            self.comment = metainfo['comment']
+            
+        self.hashes = [info['pieces'][x:x+20] for x in xrange(0,
+            len(info['pieces']), 20)]
+        self.infohash = sha(bencode(info)).digest()
+
+    def show_encoding_errors(self, errorfunc):
+        self.reported_errors = True
+        if self.bad_torrent_unsolvable:
+            errorfunc(ERROR,
+                      _("This .torrent file has been created with a broken "
+                        "tool and has incorrectly encoded filenames. Some or "
+                        "all of the filenames may appear different from what "
+                        "the creator of the .torrent file intended."))
+        elif self.bad_torrent_noncharacter:
+            errorfunc(ERROR,
+                      _("This .torrent file has been created with a broken "
+                        "tool and has bad character values that do not "
+                        "correspond to any real character. Some or all of the "
+                        "filenames may appear different from what the creator "
+                        "of the .torrent file intended."))
+        elif self.bad_torrent_wrongfield:
+            errorfunc(ERROR,
+                      _("This .torrent file has been created with a broken "
+                        "tool and has incorrectly encoded filenames. The "
+                        "names used may still be correct."))
+        elif self.bad_conversion:
+            errorfunc(WARNING,
+                      _('The character set used on the local filesystem ("%s") '
+                        'cannot represent all characters used in the '
+                        'filename(s) of this torrent. Filenames have been '
+                        'changed from the original.') % filesystem_encoding)
+        elif self.bad_windows:
+            errorfunc(WARNING,
+                      _("The Windows filesystem cannot handle some "
+                        "characters used in the filename(s) of this torrent. "
+                        "Filenames have been changed from the original."))
+        elif self.bad_path:
+            errorfunc(WARNING,
+                      _("This .torrent file has been created with a broken "
+                        "tool and has at least 1 file with an invalid file "
+                        "or directory name. However since all such files "
+                        "were marked as having length 0 those files are "
+                        "just ignored."))
+
+    # At least BitComet seems to make bad .torrent files that have
+    # fields in an arbitrary encoding but separate 'field.utf-8' attributes
+    def _get_attr_utf8(self, d, attrib):
+        v = d.get(attrib + '.utf-8')
+        if v is not None:
+            if v != d[attrib]:
+                self.bad_torrent_wrongfield = True
+        else:
+            v = d[attrib]
+        return v
+
+    def _enforce_utf8(self, s):
+        try:
+            s = s.decode('utf-8')
+        except:
+            self.bad_torrent_unsolvable = True
+            s = s.decode('utf-8', 'replace')
+        t = s.translate(noncharacter_translate)
+        if t != s:
+            self.bad_torrent_noncharacter = True
+        return t.encode('utf-8')
+
+    def _get_field_utf8(self, d, attrib):
+        r = self._get_attr_utf8(d, attrib)
+        return self._enforce_utf8(r)
+
+    def _fix_windows(self, name, t=windows_translate):
+        bad = False
+        r = name.translate(t)
+        # for some reason name cannot end with '.' or space
+        if r[-1] in '. ':
+            r = r + '-'
+        if r != name:
+            self.bad_windows = True
+            bad = True
+        return (r, bad)
+
+    def _to_fs(self, name):
+        return self._to_fs_2(name)[1]
+
+    def _to_fs_2(self, name):
+        bad = False
+        if sys.platform.startswith('win'):
+            name, bad = self._fix_windows(name)
+        name = name.decode('utf-8')
+        try:
+            r = name.encode(filesystem_encoding)
+        except:
+            self.bad_conversion = True
+            bad = True
+            r = name.encode(filesystem_encoding, 'replace')
+
+        if sys.platform.startswith('win'):
+            # encoding to mbcs with or without 'replace' will make the
+            # name unsupported by windows again because it adds random
+            # '?' characters which are invalid windows filesystem
+            # character
+            r, bad = self._fix_windows(r)
+        return (bad, r)