diff options
Diffstat (limited to 'NohGooee/ConvertedMetainfo.py')
-rw-r--r-- | NohGooee/ConvertedMetainfo.py | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/NohGooee/ConvertedMetainfo.py b/NohGooee/ConvertedMetainfo.py new file mode 100644 index 0000000..9b2459f --- /dev/null +++ b/NohGooee/ConvertedMetainfo.py @@ -0,0 +1,283 @@ +# The contents of this file are subject to the BitTorrent Open Source License +# Version 1.1 (the License). You may not copy or use this file, in either +# source code or executable form, except in compliance with the License. You +# may obtain a copy of the License at http://www.bittorrent.com/license/. +# +# Software distributed under the License is distributed on an AS IS basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. + +# Written by Uoti Urpala + +import os +import sys +from sha import sha + +from NohGooee.bencode import bencode +from NohGooee import btformats +from NohGooee import BTFailure, WARNING, ERROR + + +WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|' +windows_translate = [chr(i) for i in range(256)] +for x in WINDOWS_UNSUPPORTED_CHARS: + windows_translate[ord(x)] = '-' +windows_translate = ''.join(windows_translate) + +noncharacter_translate = {} +for i in range(0xD800, 0xE000): + noncharacter_translate[i] = ord('-') +for i in range(0xFDD0, 0xFDF0): + noncharacter_translate[i] = ord('-') +for i in (0xFFFE, 0xFFFF): + noncharacter_translate[i] = ord('-') + +del x, i + +def set_filesystem_encoding(encoding, errorfunc): + global filesystem_encoding + filesystem_encoding = 'ascii' + if encoding == '': + try: + sys.getfilesystemencoding + except AttributeError: + errorfunc(WARNING, + _("This seems to be an old Python version which " + "does not support detecting the filesystem " + "encoding. Assuming 'ascii'.")) + return + encoding = sys.getfilesystemencoding() + if encoding is None: + errorfunc(WARNING, + _("Python failed to autodetect filesystem encoding. " + "Using 'ascii' instead.")) + return + try: + 'a1'.decode(encoding) + except: + errorfunc(ERROR, + _("Filesystem encoding '%s' is not supported. " + "Using 'ascii' instead.") % encoding) + return + filesystem_encoding = encoding + + +def generate_names(name, is_dir): + if is_dir: + prefix = name + '.' + suffix = '' + else: + pos = name.rfind('.') + if pos == -1: + pos = len(name) + prefix = name[:pos] + '.' + suffix = name[pos:] + i = 0 + while True: + yield prefix + str(i) + suffix + i += 1 + + +class ConvertedMetainfo(object): + + def __init__(self, metainfo): + self.bad_torrent_wrongfield = False + self.bad_torrent_unsolvable = False + self.bad_torrent_noncharacter = False + self.bad_conversion = False + self.bad_windows = False + self.bad_path = False + self.reported_errors = False + self.is_batch = False + self.orig_files = None + self.files_fs = None + self.total_bytes = 0 + self.sizes = [] + self.comment = None + + btformats.check_message(metainfo, check_paths=False) + info = metainfo['info'] + if info.has_key('length'): + self.total_bytes = info['length'] + self.sizes.append(self.total_bytes) + else: + self.is_batch = True + r = [] + self.orig_files = [] + self.sizes = [] + i = 0 + for f in info['files']: + l = f['length'] + self.total_bytes += l + self.sizes.append(l) + path = self._get_attr_utf8(f, 'path') + for x in path: + if not btformats.allowed_path_re.match(x): + if l > 0: + raise BTFailure(_("Bad file path component: ")+x) + # BitComet makes bad .torrent files with empty + # filename part + self.bad_path = True + break + else: + p = [] + for x in path: + p.append((self._enforce_utf8(x), x)) + path = p + self.orig_files.append('/'.join([x[0] for x in path])) + k = [] + for u,o in path: + tf2 = self._to_fs_2(u) + k.append((tf2, u, o)) + r.append((k,i)) + i += 1 + # If two or more file/subdirectory names in the same directory + # would map to the same name after encoding conversions + Windows + # workarounds, change them. Files are changed as + # 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without + # '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original + # names was a "clean" conversion, that one is always unchanged + # and the rest are adjusted. + r.sort() + self.files_fs = [None] * len(r) + prev = [None] + res = [] + stack = [{}] + for x in r: + j = 0 + x, i = x + while x[j] == prev[j]: + j += 1 + del res[j:] + del stack[j+1:] + name = x[j][0][1] + if name in stack[-1]: + for name in generate_names(x[j][1], j != len(x) - 1): + name = self._to_fs(name) + if name not in stack[-1]: + break + stack[-1][name] = None + res.append(name) + for j in range(j + 1, len(x)): + name = x[j][0][1] + stack.append({name: None}) + res.append(name) + self.files_fs[i] = os.path.join(*res) + prev = x + + self.name = self._get_field_utf8(info, 'name') + self.name_fs = self._to_fs(self.name) + self.piece_length = info['piece length'] + self.is_trackerless = False + if metainfo.has_key('announce'): + self.announce = metainfo['announce'] + elif metainfo.has_key('nodes'): + self.is_trackerless = True + self.nodes = metainfo['nodes'] + + if metainfo.has_key('comment'): + self.comment = metainfo['comment'] + + self.hashes = [info['pieces'][x:x+20] for x in xrange(0, + len(info['pieces']), 20)] + self.infohash = sha(bencode(info)).digest() + + def show_encoding_errors(self, errorfunc): + self.reported_errors = True + if self.bad_torrent_unsolvable: + errorfunc(ERROR, + _("This .torrent file has been created with a broken " + "tool and has incorrectly encoded filenames. Some or " + "all of the filenames may appear different from what " + "the creator of the .torrent file intended.")) + elif self.bad_torrent_noncharacter: + errorfunc(ERROR, + _("This .torrent file has been created with a broken " + "tool and has bad character values that do not " + "correspond to any real character. Some or all of the " + "filenames may appear different from what the creator " + "of the .torrent file intended.")) + elif self.bad_torrent_wrongfield: + errorfunc(ERROR, + _("This .torrent file has been created with a broken " + "tool and has incorrectly encoded filenames. The " + "names used may still be correct.")) + elif self.bad_conversion: + errorfunc(WARNING, + _('The character set used on the local filesystem ("%s") ' + 'cannot represent all characters used in the ' + 'filename(s) of this torrent. Filenames have been ' + 'changed from the original.') % filesystem_encoding) + elif self.bad_windows: + errorfunc(WARNING, + _("The Windows filesystem cannot handle some " + "characters used in the filename(s) of this torrent. " + "Filenames have been changed from the original.")) + elif self.bad_path: + errorfunc(WARNING, + _("This .torrent file has been created with a broken " + "tool and has at least 1 file with an invalid file " + "or directory name. However since all such files " + "were marked as having length 0 those files are " + "just ignored.")) + + # At least BitComet seems to make bad .torrent files that have + # fields in an arbitrary encoding but separate 'field.utf-8' attributes + def _get_attr_utf8(self, d, attrib): + v = d.get(attrib + '.utf-8') + if v is not None: + if v != d[attrib]: + self.bad_torrent_wrongfield = True + else: + v = d[attrib] + return v + + def _enforce_utf8(self, s): + try: + s = s.decode('utf-8') + except: + self.bad_torrent_unsolvable = True + s = s.decode('utf-8', 'replace') + t = s.translate(noncharacter_translate) + if t != s: + self.bad_torrent_noncharacter = True + return t.encode('utf-8') + + def _get_field_utf8(self, d, attrib): + r = self._get_attr_utf8(d, attrib) + return self._enforce_utf8(r) + + def _fix_windows(self, name, t=windows_translate): + bad = False + r = name.translate(t) + # for some reason name cannot end with '.' or space + if r[-1] in '. ': + r = r + '-' + if r != name: + self.bad_windows = True + bad = True + return (r, bad) + + def _to_fs(self, name): + return self._to_fs_2(name)[1] + + def _to_fs_2(self, name): + bad = False + if sys.platform.startswith('win'): + name, bad = self._fix_windows(name) + name = name.decode('utf-8') + try: + r = name.encode(filesystem_encoding) + except: + self.bad_conversion = True + bad = True + r = name.encode(filesystem_encoding, 'replace') + + if sys.platform.startswith('win'): + # encoding to mbcs with or without 'replace' will make the + # name unsupported by windows again because it adds random + # '?' characters which are invalid windows filesystem + # character + r, bad = self._fix_windows(r) + return (bad, r) |