summaryrefslogtreecommitdiffstats
path: root/openstack/common/strutils.py
diff options
context:
space:
mode:
authorFlaper Fesp <flaper87@gmail.com>2013-01-24 13:33:45 +0100
committerFlaper Fesp <flaper87@gmail.com>2013-02-25 17:52:07 +0100
commitbd5dad97585208ea5e86d636f3dc3b669e361a41 (patch)
treefb58c80f702dea421f3b712221875a6924826cd2 /openstack/common/strutils.py
parent15377750465b6eb261d2354988b9c90f1f3c1d29 (diff)
downloadoslo-bd5dad97585208ea5e86d636f3dc3b669e361a41.tar.gz
oslo-bd5dad97585208ea5e86d636f3dc3b669e361a41.tar.xz
oslo-bd5dad97585208ea5e86d636f3dc3b669e361a41.zip
Decode / Encode string utils for openstack
Currently some clients lack of non-ASCII characters support. This patch introduces 2 functions (strutils.py) that will help clients and servers to "safely" encode and decode strings. About the ensure_(str|unicode) functions: They both try to use first the encoding used in stdin (or python's default encoding if that's None) and fallback to utf-8 if those encodings fail to decode a given text. Neither of them will try to encode / decode non-basestring objects and will raise a TypeError if one is passed. Use case: This is currently being used in glanceclient. I5c3ea93a716edfe284d19f6291d4e36028f91eb2 Needed For: * Bug 1061156 * Bug 1130572 Change-Id: I78960dfdb6159fd600a6f5e5551ab5d5a3366ab5
Diffstat (limited to 'openstack/common/strutils.py')
-rw-r--r--openstack/common/strutils.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/openstack/common/strutils.py b/openstack/common/strutils.py
index 05f0e9f..7813b64 100644
--- a/openstack/common/strutils.py
+++ b/openstack/common/strutils.py
@@ -20,6 +20,7 @@ System-level utilities and helper functions.
"""
import logging
+import sys
LOG = logging.getLogger(__name__)
@@ -57,3 +58,76 @@ def bool_from_string(subject):
if subject.strip().lower() in ('true', 'on', 'yes', '1'):
return True
return False
+
+
+def safe_decode(text, incoming=None, errors='strict'):
+ """
+ Decodes incoming str using `incoming` if they're
+ not already unicode.
+
+ :param incoming: Text's current encoding
+ :param errors: Errors handling policy. See here for valid
+ values http://docs.python.org/2/library/codecs.html
+ :returns: text or a unicode `incoming` encoded
+ representation of it.
+ :raises TypeError: If text is not an isntance of basestring
+ """
+ if not isinstance(text, basestring):
+ raise TypeError("%s can't be decoded" % type(text))
+
+ if isinstance(text, unicode):
+ return text
+
+ if not incoming:
+ incoming = (sys.stdin.encoding or
+ sys.getdefaultencoding())
+
+ try:
+ return text.decode(incoming, errors)
+ except UnicodeDecodeError:
+ # Note(flaper87) If we get here, it means that
+ # sys.stdin.encoding / sys.getdefaultencoding
+ # didn't return a suitable encoding to decode
+ # text. This happens mostly when global LANG
+ # var is not set correctly and there's no
+ # default encoding. In this case, most likely
+ # python will use ASCII or ANSI encoders as
+ # default encodings but they won't be capable
+ # of decoding non-ASCII characters.
+ #
+ # Also, UTF-8 is being used since it's an ASCII
+ # extension.
+ return text.decode('utf-8', errors)
+
+
+def safe_encode(text, incoming=None,
+ encoding='utf-8', errors='strict'):
+ """
+ Encodes incoming str/unicode using `encoding`. If
+ incoming is not specified, text is expected to
+ be encoded with current python's default encoding.
+ (`sys.getdefaultencoding`)
+
+ :param incoming: Text's current encoding
+ :param encoding: Expected encoding for text (Default UTF-8)
+ :param errors: Errors handling policy. See here for valid
+ values http://docs.python.org/2/library/codecs.html
+ :returns: text or a bytestring `encoding` encoded
+ representation of it.
+ :raises TypeError: If text is not an isntance of basestring
+ """
+ if not isinstance(text, basestring):
+ raise TypeError("%s can't be encoded" % type(text))
+
+ if not incoming:
+ incoming = (sys.stdin.encoding or
+ sys.getdefaultencoding())
+
+ if isinstance(text, unicode):
+ return text.encode(encoding, errors)
+ elif text and encoding != incoming:
+ # Decode text before encoding it with `encoding`
+ text = safe_decode(text, incoming, errors)
+ return text.encode(encoding, errors)
+
+ return text