From 97f0671ce9dd1d260fea4e95f6e6e017a1ef1048 Mon Sep 17 00:00:00 2001 From: John Dennis Date: Wed, 3 Aug 2011 19:26:19 -0400 Subject: ticket 1569 - Test DN object non-latin Unicode support The DN unittest was lacking a test for i18n. The unittest was updated to store "Hello" in Arabic with both utf-8 and unicode and verify the values could be properly retrieved and converted to dn string syntax. During the testing a few problems were discovered and corrected. * passing in utf-8 caused an ASCII decode error becuase of Python's silly default encoding of ASCII. The fix was to explictly use the utf-8 codec. * there were a couple of places where encode/decode were not called correctly. * the internal attr and value members of the AVA class were renamed to explicitly show they are stored as unicode. Of course the unittest was updated as well. --- ipalib/dn.py | 38 ++++++++++-------- tests/test_ipalib/test_dn.py | 94 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 16 deletions(-) diff --git a/ipalib/dn.py b/ipalib/dn.py index 0eac71166..dc3119d9a 100644 --- a/ipalib/dn.py +++ b/ipalib/dn.py @@ -19,8 +19,11 @@ from ldap.dn import str2dn, dn2str from ldap import DECODING_ERROR +import codecs import sys +utf8_codec = codecs.lookup('utf-8') + __all__ = ['AVA', 'RDN', 'DN'] ''' @@ -519,44 +522,47 @@ class AVA(object): if not isinstance(value, basestring): raise TypeError("value must be basestring, got %s instead" % value.__class__.__name__) - attr = attr.decode('utf-8') - value = value.decode('utf-8') - - self._attr = attr - self._value = value + self.attr = attr + self.value = value def _get_attr(self): - return self._attr + return self._attr_unicode def _set_attr(self, new_attr): if not isinstance(new_attr, basestring): raise TypeError("attr must be basestring, got %s instead" % new_attr.__class__.__name__) - self._attr = new_attr + if isinstance(new_attr, unicode): + self._attr_unicode = new_attr + else: + self._attr_unicode = utf8_codec.decode(new_attr)[0] attr = property(_get_attr, _set_attr) def _get_value(self): - return self._value + return self._value_unicode def _set_value(self, new_value): if not isinstance(new_value, basestring): raise TypeError("value must be basestring, got %s instead" % new_value.__class__.__name__) - self._value = new_value + if isinstance(new_value, unicode): + self._value_unicode = new_value + else: + self._value_unicode = utf8_codec.decode(new_value)[0] value = property(_get_value, _set_value) def _to_openldap(self): - return [[(self._attr.encode('utf-8'), self._value.encode('utf-8'), self.flags)]] + return [[(self._attr_unicode.encode('utf-8'), self._value_unicode.encode('utf-8'), self.flags)]] def __str__(self): return dn2str(self._to_openldap()) def __getitem__(self, key): if isinstance(key, basestring): - if key == self._attr: - return self._value + if key == self._attr_unicode: + return self._value_unicode raise KeyError("\"%s\" not found in %s" % (key, self.__str__())) else: raise TypeError("unsupported type for AVA indexing, must be basestring; not %s" % \ @@ -578,8 +584,8 @@ class AVA(object): if not isinstance(other, self.__class__): raise TypeError("expected AVA but got %s" % (other.__class__.__name__)) - return self._attr.lower() == other.attr.lower() and \ - self._value.lower() == other.value.lower() + return self._attr_unicode.lower() == other.attr.lower() and \ + self._value_unicode.lower() == other.value.lower() def __cmp__(self, other): 'comparision is case insensitive, see __eq__ doc for explanation' @@ -587,10 +593,10 @@ class AVA(object): if not isinstance(other, self.__class__): raise TypeError("expected AVA but got %s" % (other.__class__.__name__)) - result = cmp(self._attr.lower(), other.attr.lower()) + result = cmp(self._attr_unicode.lower(), other.attr.lower()) if result != 0: return result - result = cmp(self._value.lower(), other.value.lower()) + result = cmp(self._value_unicode.lower(), other.value.lower()) return result class RDN(object): diff --git a/tests/test_ipalib/test_dn.py b/tests/test_ipalib/test_dn.py index f4aa0aaec..04e442f3f 100644 --- a/tests/test_ipalib/test_dn.py +++ b/tests/test_ipalib/test_dn.py @@ -987,5 +987,99 @@ class TestEscapes(unittest.TestCase): self.assertEqual(dn['cn'], self.privilege) self.assertEqual(dn[0].value, self.privilege) +class TestInternationalization(unittest.TestCase): + def setUp(self): + # Hello in Arabic + self.arabic_hello_utf8 = '\xd9\x85\xd9\x83\xd9\x8a\xd9\x84' + \ + '\xd8\xb9\x20\xd9\x85\xd8\xa7\xd9' + \ + '\x84\xd9\x91\xd8\xb3\xd9\x84\xd8\xa7' + + self.arabic_hello_unicode = self.arabic_hello_utf8.decode('utf-8') + + def test_i18n(self): + self.assertEqual(self.arabic_hello_utf8, + self.arabic_hello_unicode.encode('utf-8')) + + # AVA's + # test attr i18n + ava1 = AVA(self.arabic_hello_unicode, 'foo') + self.assertIsInstance(ava1.attr, unicode) + self.assertIsInstance(ava1.value, unicode) + self.assertEqual(ava1.attr, self.arabic_hello_unicode) + self.assertEqual(str(ava1), self.arabic_hello_utf8+'=foo') + + ava1 = AVA(self.arabic_hello_utf8, 'foo') + self.assertIsInstance(ava1.attr, unicode) + self.assertIsInstance(ava1.value, unicode) + self.assertEqual(ava1.attr, self.arabic_hello_unicode) + self.assertEqual(str(ava1), self.arabic_hello_utf8+'=foo') + + # test value i18n + ava1 = AVA('cn', self.arabic_hello_unicode) + self.assertIsInstance(ava1.attr, unicode) + self.assertIsInstance(ava1.value, unicode) + self.assertEqual(ava1.value, self.arabic_hello_unicode) + self.assertEqual(str(ava1), 'cn='+self.arabic_hello_utf8) + + ava1 = AVA('cn', self.arabic_hello_utf8) + self.assertIsInstance(ava1.attr, unicode) + self.assertIsInstance(ava1.value, unicode) + self.assertEqual(ava1.value, self.arabic_hello_unicode) + self.assertEqual(str(ava1), 'cn='+self.arabic_hello_utf8) + + # RDN's + # test attr i18n + rdn1 = RDN((self.arabic_hello_unicode, 'foo')) + self.assertIsInstance(rdn1.attr, unicode) + self.assertIsInstance(rdn1.value, unicode) + self.assertEqual(rdn1.attr, self.arabic_hello_unicode) + self.assertEqual(str(rdn1), self.arabic_hello_utf8+'=foo') + + rdn1 = RDN((self.arabic_hello_utf8, 'foo')) + self.assertIsInstance(rdn1.attr, unicode) + self.assertIsInstance(rdn1.value, unicode) + self.assertEqual(rdn1.attr, self.arabic_hello_unicode) + self.assertEqual(str(rdn1), self.arabic_hello_utf8+'=foo') + + # test value i18n + rdn1 = RDN(('cn', self.arabic_hello_unicode)) + self.assertIsInstance(rdn1.attr, unicode) + self.assertIsInstance(rdn1.value, unicode) + self.assertEqual(rdn1.value, self.arabic_hello_unicode) + self.assertEqual(str(rdn1), 'cn='+self.arabic_hello_utf8) + + rdn1 = RDN(('cn', self.arabic_hello_utf8)) + self.assertIsInstance(rdn1.attr, unicode) + self.assertIsInstance(rdn1.value, unicode) + self.assertEqual(rdn1.value, self.arabic_hello_unicode) + self.assertEqual(str(rdn1), 'cn='+self.arabic_hello_utf8) + + # DN's + # test attr i18n + dn1 = DN((self.arabic_hello_unicode, 'foo')) + self.assertIsInstance(dn1[0].attr, unicode) + self.assertIsInstance(dn1[0].value, unicode) + self.assertEqual(dn1[0].attr, self.arabic_hello_unicode) + self.assertEqual(str(dn1), self.arabic_hello_utf8+'=foo') + + dn1 = DN((self.arabic_hello_utf8, 'foo')) + self.assertIsInstance(dn1[0].attr, unicode) + self.assertIsInstance(dn1[0].value, unicode) + self.assertEqual(dn1[0].attr, self.arabic_hello_unicode) + self.assertEqual(str(dn1), self.arabic_hello_utf8+'=foo') + + # test value i18n + dn1 = DN(('cn', self.arabic_hello_unicode)) + self.assertIsInstance(dn1[0].attr, unicode) + self.assertIsInstance(dn1[0].value, unicode) + self.assertEqual(dn1[0].value, self.arabic_hello_unicode) + self.assertEqual(str(dn1), 'cn='+self.arabic_hello_utf8) + + dn1 = DN(('cn', self.arabic_hello_utf8)) + self.assertIsInstance(dn1[0].attr, unicode) + self.assertIsInstance(dn1[0].value, unicode) + self.assertEqual(dn1[0].value, self.arabic_hello_unicode) + self.assertEqual(str(dn1), 'cn='+self.arabic_hello_utf8) + if __name__ == '__main__': unittest.main() -- cgit