summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-02-17 16:04:35 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-02-17 16:04:35 +0100
commita511b80f126dec00ce9697808a4e287a530a4626 (patch)
treeae6af2323a2edaeea9c23dde4423da653a67ca80 /lib
parentd6450b3383e2d46453be0f6c13014137fabc7bca (diff)
downloadabrt-a511b80f126dec00ce9697808a4e287a530a4626.tar.gz
abrt-a511b80f126dec00ce9697808a4e287a530a4626.tar.xz
abrt-a511b80f126dec00ce9697808a4e287a530a4626.zip
lib/Utils/abrt_dbus: utf8-sanitize all strings in dbus messages (fixes #565876)
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Utils/abrt_dbus.cpp98
1 files changed, 97 insertions, 1 deletions
diff --git a/lib/Utils/abrt_dbus.cpp b/lib/Utils/abrt_dbus.cpp
index 66608410..4ec79b90 100644
--- a/lib/Utils/abrt_dbus.cpp
+++ b/lib/Utils/abrt_dbus.cpp
@@ -54,10 +54,106 @@ void store_uint64(DBusMessageIter* iter, uint64_t val)
if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_UINT64, &val))
die_out_of_memory();
}
+
+/* dbus daemon will simply close our connection if we send broken utf8.
+ * Therefore we must never do that.
+ */
+static char *sanitize_utf8(const char *src)
+{
+ const char *initial_src = src;
+ char *sanitized = NULL;
+ unsigned sanitized_pos = 0;
+
+ while (*src)
+ {
+ int bytes = 0;
+
+ unsigned c = (unsigned char) *src;
+ if (c <= 0x7f)
+ {
+ bytes = 1;
+ goto good_byte;
+ }
+
+ /* Unicode -> utf8: */
+ /* 80-7FF -> 110yyyxx 10xxxxxx */
+ /* 800-FFFF -> 1110yyyy 10yyyyxx 10xxxxxx */
+ /* 10000-1FFFFF -> 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx */
+ /* 200000-3FFFFFF -> 111110tt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */
+ /* 4000000-FFFFFFFF -> 111111tt 10tttttt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */
+ do {
+ c <<= 1;
+ bytes++;
+ } while ((c & 0x80) && bytes < 6);
+ if (bytes == 1)
+ {
+ /* A bare "continuation" byte. Say, 80 */
+ goto bad_byte;
+ }
+
+ c = (uint8_t)(c) >> bytes;
+ {
+ const char *pp = src;
+ int cnt = bytes;
+ while (--cnt)
+ {
+ unsigned ch = (unsigned char) *++pp;
+ if ((ch & 0xc0) != 0x80) /* Missing "continuation" byte. Example: e0 80 */
+ {
+ goto bad_byte;
+ }
+ c = (c << 6) + (ch & 0x3f);
+ }
+ }
+ /* TODO */
+ /* Need to check that c isn't produced by overlong encoding */
+ /* Example: 11000000 10000000 converts to NUL */
+ /* 11110000 10000000 10000100 10000000 converts to 0x100 */
+ /* correct encoding: 11000100 10000000 */
+ if (c <= 0x7f) /* crude check: only catches bad encodings which map to chars <= 7f */
+ {
+ goto bad_byte;
+ }
+
+ good_byte:
+ while (--bytes >= 0)
+ {
+ c = (unsigned char) *src++;
+ if (sanitized)
+ {
+ sanitized = (char*) xrealloc(sanitized, sanitized_pos + 2);
+ sanitized[sanitized_pos++] = c;
+ sanitized[sanitized_pos] = '\0';
+ }
+ }
+ continue;
+
+ bad_byte:
+ if (!sanitized)
+ {
+ sanitized_pos = src - initial_src;
+ sanitized = xstrndup(initial_src, sanitized_pos);
+ }
+ sanitized = (char*) xrealloc(sanitized, sanitized_pos + 5);
+ sanitized[sanitized_pos++] = '[';
+ c = (unsigned char) *src++;
+ sanitized[sanitized_pos++] = "0123456789ABCDEF"[c >> 4];
+ sanitized[sanitized_pos++] = "0123456789ABCDEF"[c & 0xf];
+ sanitized[sanitized_pos++] = ']';
+ sanitized[sanitized_pos] = '\0';
+ }
+
+ if (sanitized)
+ VERB2 log("note: bad utf8, converted '%s' -> '%s'", initial_src, sanitized);
+
+ return sanitized; /* usually NULL: the whole string is ok */
+}
void store_string(DBusMessageIter* iter, const char* val)
{
- if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, &val))
+ const char *sanitized = sanitize_utf8(val);
+ if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, sanitized ? &sanitized : &val))
die_out_of_memory();
+ free((char*)sanitized);
}