# The contents of this file are subject to the BitTorrent Open Source License # Version 1.1 (the License). You may not copy or use this file, in either # source code or executable form, except in compliance with the License. You # may obtain a copy of the License at http://www.bittorrent.com/license/. # # Software distributed under the License is distributed on an AS IS basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License # for the specific language governing rights and limitations under the # License. # Written by Uoti Urpala # required for Python 2.2 from __future__ import generators import os import sys from sha import sha from BitTorrent.obsoletepythonsupport import * from BitTorrent.bencode import bencode from BitTorrent import btformats from BitTorrent import BTFailure, WARNING, ERROR WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|' windows_translate = [chr(i) for i in range(256)] for x in WINDOWS_UNSUPPORTED_CHARS: windows_translate[ord(x)] = '-' windows_translate = ''.join(windows_translate) noncharacter_translate = {} for i in range(0xD800, 0xE000): noncharacter_translate[i] = ord('-') for i in range(0xFDD0, 0xFDF0): noncharacter_translate[i] = ord('-') for i in (0xFFFE, 0xFFFF): noncharacter_translate[i] = ord('-') del x, i def set_filesystem_encoding(encoding, errorfunc): global filesystem_encoding filesystem_encoding = 'ascii' if encoding == '': try: sys.getfilesystemencoding except AttributeError: errorfunc(WARNING, _("This seems to be an old Python version which " "does not support detecting the filesystem " "encoding. Assuming 'ascii'.")) return encoding = sys.getfilesystemencoding() if encoding is None: errorfunc(WARNING, _("Python failed to autodetect filesystem encoding. " "Using 'ascii' instead.")) return try: 'a1'.decode(encoding) except: errorfunc(ERROR, _("Filesystem encoding '%s' is not supported. " "Using 'ascii' instead.") % encoding) return filesystem_encoding = encoding def generate_names(name, is_dir): if is_dir: prefix = name + '.' suffix = '' else: pos = name.rfind('.') if pos == -1: pos = len(name) prefix = name[:pos] + '.' suffix = name[pos:] i = 0 while True: yield prefix + str(i) + suffix i += 1 class ConvertedMetainfo(object): def __init__(self, metainfo): self.bad_torrent_wrongfield = False self.bad_torrent_unsolvable = False self.bad_torrent_noncharacter = False self.bad_conversion = False self.bad_windows = False self.bad_path = False self.reported_errors = False self.is_batch = False self.orig_files = None self.files_fs = None self.total_bytes = 0 self.sizes = [] self.comment = None btformats.check_message(metainfo, check_paths=False) info = metainfo['info'] if info.has_key('length'): self.total_bytes = info['length'] self.sizes.append(self.total_bytes) else: self.is_batch = True r = [] self.orig_files = [] self.sizes = [] i = 0 for f in info['files']: l = f['length'] self.total_bytes += l self.sizes.append(l) path = self._get_attr_utf8(f, 'path') for x in path: if not btformats.allowed_path_re.match(x): if l > 0: raise BTFailure(_("Bad file path component: ")+x) # BitComet makes bad .torrent files with empty # filename part self.bad_path = True break else: p = [] for x in path: p.append((self._enforce_utf8(x), x)) path = p self.orig_files.append('/'.join([x[0] for x in path])) k = [] for u,o in path: tf2 = self._to_fs_2(u) k.append((tf2, u, o)) r.append((k,i)) i += 1 # If two or more file/subdirectory names in the same directory # would map to the same name after encoding conversions + Windows # workarounds, change them. Files are changed as # 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without # '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original # names was a "clean" conversion, that one is always unchanged # and the rest are adjusted. r.sort() self.files_fs = [None] * len(r) prev = [None] res = [] stack = [{}] for x in r: j = 0 x, i = x while x[j] == prev[j]: j += 1 del res[j:] del stack[j+1:] name = x[j][0][1] if name in stack[-1]: for name in generate_names(x[j][1], j != len(x) - 1): name = self._to_fs(name) if name not in stack[-1]: break stack[-1][name] = None res.append(name) for j in range(j + 1, len(x)): name = x[j][0][1] stack.append({name: None}) res.append(name) self.files_fs[i] = os.path.join(*res) prev = x self.name = self._get_field_utf8(info, 'name') self.name_fs = self._to_fs(self.name) self.piece_length = info['piece length'] self.is_trackerless = False if metainfo.has_key('announce'): self.announce = metainfo['announce'] elif metainfo.has_key('nodes'): self.is_trackerless = True self.nodes = metainfo['nodes'] if metainfo.has_key('comment'): self.comment = metainfo['comment'] self.hashes = [info['pieces'][x:x+20] for x in xrange(0, len(info['pieces']), 20)] self.infohash = sha(bencode(info)).digest() def show_encoding_errors(self, errorfunc): self.reported_errors = True if self.bad_torrent_unsolvable: errorfunc(ERROR, _("This .torrent file has been created with a broken " "tool and has incorrectly encoded filenames. Some or " "all of the filenames may appear different from what " "the creator of the .torrent file intended.")) elif self.bad_torrent_noncharacter: errorfunc(ERROR, _("This .torrent file has been created with a broken " "tool and has bad character values that do not " "correspond to any real character. Some or all of the " "filenames may appear different from what the creator " "of the .torrent file intended.")) elif self.bad_torrent_wrongfield: errorfunc(ERROR, _("This .torrent file has been created with a broken " "tool and has incorrectly encoded filenames. The " "names used may still be correct.")) elif self.bad_conversion: errorfunc(WARNING, _('The character set used on the local filesystem ("%s") ' 'cannot represent all characters used in the ' 'filename(s) of this torrent. Filenames have been ' 'changed from the original.') % filesystem_encoding) elif self.bad_windows: errorfunc(WARNING, _("The Windows filesystem cannot handle some " "characters used in the filename(s) of this torrent. " "Filenames have been changed from the original.")) elif self.bad_path: errorfunc(WARNING, _("This .torrent file has been created with a broken " "tool and has at least 1 file with an invalid file " "or directory name. However since all such files " "were marked as having length 0 those files are " "just ignored.")) # At least BitComet seems to make bad .torrent files that have # fields in an arbitrary encoding but separate 'field.utf-8' attributes def _get_attr_utf8(self, d, attrib): v = d.get(attrib + '.utf-8') if v is not None: if v != d[attrib]: self.bad_torrent_wrongfield = True else: v = d[attrib] return v def _enforce_utf8(self, s): try: s = s.decode('utf-8') except: self.bad_torrent_unsolvable = True s = s.decode('utf-8', 'replace') t = s.translate(noncharacter_translate) if t != s: self.bad_torrent_noncharacter = True return t.encode('utf-8') def _get_field_utf8(self, d, attrib): r = self._get_attr_utf8(d, attrib) return self._enforce_utf8(r) def _fix_windows(self, name, t=windows_translate): bad = False r = name.translate(t) # for some reason name cannot end with '.' or space if r[-1] in '. ': r = r + '-' if r != name: self.bad_windows = True bad = True return (r, bad) def _to_fs(self, name): return self._to_fs_2(name)[1] def _to_fs_2(self, name): bad = False if sys.platform.startswith('win'): name, bad = self._fix_windows(name) name = name.decode('utf-8') try: r = name.encode(filesystem_encoding) except: self.bad_conversion = True bad = True r = name.encode(filesystem_encoding, 'replace') if sys.platform.startswith('win'): # encoding to mbcs with or without 'replace' will make the # name unsupported by windows again because it adds random # '?' characters which are invalid windows filesystem # character r, bad = self._fix_windows(r) return (bad, r)