summaryrefslogtreecommitdiffstats
path: root/BitTorrent/ConvertedMetainfo.py
diff options
context:
space:
mode:
Diffstat (limited to 'BitTorrent/ConvertedMetainfo.py')
-rw-r--r--BitTorrent/ConvertedMetainfo.py288
1 files changed, 288 insertions, 0 deletions
diff --git a/BitTorrent/ConvertedMetainfo.py b/BitTorrent/ConvertedMetainfo.py
new file mode 100644
index 0000000..49eda7e
--- /dev/null
+++ b/BitTorrent/ConvertedMetainfo.py
@@ -0,0 +1,288 @@
+# The contents of this file are subject to the BitTorrent Open Source License
+# Version 1.1 (the License). You may not copy or use this file, in either
+# source code or executable form, except in compliance with the License. You
+# may obtain a copy of the License at http://www.bittorrent.com/license/.
+#
+# Software distributed under the License is distributed on an AS IS basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+
+# Written by Uoti Urpala
+
+# required for Python 2.2
+from __future__ import generators
+
+import os
+import sys
+from sha import sha
+
+from BitTorrent.obsoletepythonsupport import *
+
+from BitTorrent.bencode import bencode
+from BitTorrent import btformats
+from BitTorrent import BTFailure, WARNING, ERROR
+
+
+WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|'
+windows_translate = [chr(i) for i in range(256)]
+for x in WINDOWS_UNSUPPORTED_CHARS:
+ windows_translate[ord(x)] = '-'
+windows_translate = ''.join(windows_translate)
+
+noncharacter_translate = {}
+for i in range(0xD800, 0xE000):
+ noncharacter_translate[i] = ord('-')
+for i in range(0xFDD0, 0xFDF0):
+ noncharacter_translate[i] = ord('-')
+for i in (0xFFFE, 0xFFFF):
+ noncharacter_translate[i] = ord('-')
+
+del x, i
+
+def set_filesystem_encoding(encoding, errorfunc):
+ global filesystem_encoding
+ filesystem_encoding = 'ascii'
+ if encoding == '':
+ try:
+ sys.getfilesystemencoding
+ except AttributeError:
+ errorfunc(WARNING,
+ _("This seems to be an old Python version which "
+ "does not support detecting the filesystem "
+ "encoding. Assuming 'ascii'."))
+ return
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ errorfunc(WARNING,
+ _("Python failed to autodetect filesystem encoding. "
+ "Using 'ascii' instead."))
+ return
+ try:
+ 'a1'.decode(encoding)
+ except:
+ errorfunc(ERROR,
+ _("Filesystem encoding '%s' is not supported. "
+ "Using 'ascii' instead.") % encoding)
+ return
+ filesystem_encoding = encoding
+
+
+def generate_names(name, is_dir):
+ if is_dir:
+ prefix = name + '.'
+ suffix = ''
+ else:
+ pos = name.rfind('.')
+ if pos == -1:
+ pos = len(name)
+ prefix = name[:pos] + '.'
+ suffix = name[pos:]
+ i = 0
+ while True:
+ yield prefix + str(i) + suffix
+ i += 1
+
+
+class ConvertedMetainfo(object):
+
+ def __init__(self, metainfo):
+ self.bad_torrent_wrongfield = False
+ self.bad_torrent_unsolvable = False
+ self.bad_torrent_noncharacter = False
+ self.bad_conversion = False
+ self.bad_windows = False
+ self.bad_path = False
+ self.reported_errors = False
+ self.is_batch = False
+ self.orig_files = None
+ self.files_fs = None
+ self.total_bytes = 0
+ self.sizes = []
+ self.comment = None
+
+ btformats.check_message(metainfo, check_paths=False)
+ info = metainfo['info']
+ if info.has_key('length'):
+ self.total_bytes = info['length']
+ self.sizes.append(self.total_bytes)
+ else:
+ self.is_batch = True
+ r = []
+ self.orig_files = []
+ self.sizes = []
+ i = 0
+ for f in info['files']:
+ l = f['length']
+ self.total_bytes += l
+ self.sizes.append(l)
+ path = self._get_attr_utf8(f, 'path')
+ for x in path:
+ if not btformats.allowed_path_re.match(x):
+ if l > 0:
+ raise BTFailure(_("Bad file path component: ")+x)
+ # BitComet makes bad .torrent files with empty
+ # filename part
+ self.bad_path = True
+ break
+ else:
+ p = []
+ for x in path:
+ p.append((self._enforce_utf8(x), x))
+ path = p
+ self.orig_files.append('/'.join([x[0] for x in path]))
+ k = []
+ for u,o in path:
+ tf2 = self._to_fs_2(u)
+ k.append((tf2, u, o))
+ r.append((k,i))
+ i += 1
+ # If two or more file/subdirectory names in the same directory
+ # would map to the same name after encoding conversions + Windows
+ # workarounds, change them. Files are changed as
+ # 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without
+ # '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original
+ # names was a "clean" conversion, that one is always unchanged
+ # and the rest are adjusted.
+ r.sort()
+ self.files_fs = [None] * len(r)
+ prev = [None]
+ res = []
+ stack = [{}]
+ for x in r:
+ j = 0
+ x, i = x
+ while x[j] == prev[j]:
+ j += 1
+ del res[j:]
+ del stack[j+1:]
+ name = x[j][0][1]
+ if name in stack[-1]:
+ for name in generate_names(x[j][1], j != len(x) - 1):
+ name = self._to_fs(name)
+ if name not in stack[-1]:
+ break
+ stack[-1][name] = None
+ res.append(name)
+ for j in range(j + 1, len(x)):
+ name = x[j][0][1]
+ stack.append({name: None})
+ res.append(name)
+ self.files_fs[i] = os.path.join(*res)
+ prev = x
+
+ self.name = self._get_field_utf8(info, 'name')
+ self.name_fs = self._to_fs(self.name)
+ self.piece_length = info['piece length']
+ self.is_trackerless = False
+ if metainfo.has_key('announce'):
+ self.announce = metainfo['announce']
+ elif metainfo.has_key('nodes'):
+ self.is_trackerless = True
+ self.nodes = metainfo['nodes']
+
+ if metainfo.has_key('comment'):
+ self.comment = metainfo['comment']
+
+ self.hashes = [info['pieces'][x:x+20] for x in xrange(0,
+ len(info['pieces']), 20)]
+ self.infohash = sha(bencode(info)).digest()
+
+ def show_encoding_errors(self, errorfunc):
+ self.reported_errors = True
+ if self.bad_torrent_unsolvable:
+ errorfunc(ERROR,
+ _("This .torrent file has been created with a broken "
+ "tool and has incorrectly encoded filenames. Some or "
+ "all of the filenames may appear different from what "
+ "the creator of the .torrent file intended."))
+ elif self.bad_torrent_noncharacter:
+ errorfunc(ERROR,
+ _("This .torrent file has been created with a broken "
+ "tool and has bad character values that do not "
+ "correspond to any real character. Some or all of the "
+ "filenames may appear different from what the creator "
+ "of the .torrent file intended."))
+ elif self.bad_torrent_wrongfield:
+ errorfunc(ERROR,
+ _("This .torrent file has been created with a broken "
+ "tool and has incorrectly encoded filenames. The "
+ "names used may still be correct."))
+ elif self.bad_conversion:
+ errorfunc(WARNING,
+ _('The character set used on the local filesystem ("%s") '
+ 'cannot represent all characters used in the '
+ 'filename(s) of this torrent. Filenames have been '
+ 'changed from the original.') % filesystem_encoding)
+ elif self.bad_windows:
+ errorfunc(WARNING,
+ _("The Windows filesystem cannot handle some "
+ "characters used in the filename(s) of this torrent. "
+ "Filenames have been changed from the original."))
+ elif self.bad_path:
+ errorfunc(WARNING,
+ _("This .torrent file has been created with a broken "
+ "tool and has at least 1 file with an invalid file "
+ "or directory name. However since all such files "
+ "were marked as having length 0 those files are "
+ "just ignored."))
+
+ # At least BitComet seems to make bad .torrent files that have
+ # fields in an arbitrary encoding but separate 'field.utf-8' attributes
+ def _get_attr_utf8(self, d, attrib):
+ v = d.get(attrib + '.utf-8')
+ if v is not None:
+ if v != d[attrib]:
+ self.bad_torrent_wrongfield = True
+ else:
+ v = d[attrib]
+ return v
+
+ def _enforce_utf8(self, s):
+ try:
+ s = s.decode('utf-8')
+ except:
+ self.bad_torrent_unsolvable = True
+ s = s.decode('utf-8', 'replace')
+ t = s.translate(noncharacter_translate)
+ if t != s:
+ self.bad_torrent_noncharacter = True
+ return t.encode('utf-8')
+
+ def _get_field_utf8(self, d, attrib):
+ r = self._get_attr_utf8(d, attrib)
+ return self._enforce_utf8(r)
+
+ def _fix_windows(self, name, t=windows_translate):
+ bad = False
+ r = name.translate(t)
+ # for some reason name cannot end with '.' or space
+ if r[-1] in '. ':
+ r = r + '-'
+ if r != name:
+ self.bad_windows = True
+ bad = True
+ return (r, bad)
+
+ def _to_fs(self, name):
+ return self._to_fs_2(name)[1]
+
+ def _to_fs_2(self, name):
+ bad = False
+ if sys.platform.startswith('win'):
+ name, bad = self._fix_windows(name)
+ name = name.decode('utf-8')
+ try:
+ r = name.encode(filesystem_encoding)
+ except:
+ self.bad_conversion = True
+ bad = True
+ r = name.encode(filesystem_encoding, 'replace')
+
+ if sys.platform.startswith('win'):
+ # encoding to mbcs with or without 'replace' will make the
+ # name unsupported by windows again because it adds random
+ # '?' characters which are invalid windows filesystem
+ # character
+ r, bad = self._fix_windows(r)
+ return (bad, r)