From a511b80f126dec00ce9697808a4e287a530a4626 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 17 Feb 2010 16:04:35 +0100 Subject: lib/Utils/abrt_dbus: utf8-sanitize all strings in dbus messages (fixes #565876) Signed-off-by: Denys Vlasenko --- lib/Utils/abrt_dbus.cpp | 98 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Utils/abrt_dbus.cpp b/lib/Utils/abrt_dbus.cpp index 66608410..4ec79b90 100644 --- a/lib/Utils/abrt_dbus.cpp +++ b/lib/Utils/abrt_dbus.cpp @@ -54,10 +54,106 @@ void store_uint64(DBusMessageIter* iter, uint64_t val) if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_UINT64, &val)) die_out_of_memory(); } + +/* dbus daemon will simply close our connection if we send broken utf8. + * Therefore we must never do that. + */ +static char *sanitize_utf8(const char *src) +{ + const char *initial_src = src; + char *sanitized = NULL; + unsigned sanitized_pos = 0; + + while (*src) + { + int bytes = 0; + + unsigned c = (unsigned char) *src; + if (c <= 0x7f) + { + bytes = 1; + goto good_byte; + } + + /* Unicode -> utf8: */ + /* 80-7FF -> 110yyyxx 10xxxxxx */ + /* 800-FFFF -> 1110yyyy 10yyyyxx 10xxxxxx */ + /* 10000-1FFFFF -> 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx */ + /* 200000-3FFFFFF -> 111110tt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ + /* 4000000-FFFFFFFF -> 111111tt 10tttttt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ + do { + c <<= 1; + bytes++; + } while ((c & 0x80) && bytes < 6); + if (bytes == 1) + { + /* A bare "continuation" byte. Say, 80 */ + goto bad_byte; + } + + c = (uint8_t)(c) >> bytes; + { + const char *pp = src; + int cnt = bytes; + while (--cnt) + { + unsigned ch = (unsigned char) *++pp; + if ((ch & 0xc0) != 0x80) /* Missing "continuation" byte. Example: e0 80 */ + { + goto bad_byte; + } + c = (c << 6) + (ch & 0x3f); + } + } + /* TODO */ + /* Need to check that c isn't produced by overlong encoding */ + /* Example: 11000000 10000000 converts to NUL */ + /* 11110000 10000000 10000100 10000000 converts to 0x100 */ + /* correct encoding: 11000100 10000000 */ + if (c <= 0x7f) /* crude check: only catches bad encodings which map to chars <= 7f */ + { + goto bad_byte; + } + + good_byte: + while (--bytes >= 0) + { + c = (unsigned char) *src++; + if (sanitized) + { + sanitized = (char*) xrealloc(sanitized, sanitized_pos + 2); + sanitized[sanitized_pos++] = c; + sanitized[sanitized_pos] = '\0'; + } + } + continue; + + bad_byte: + if (!sanitized) + { + sanitized_pos = src - initial_src; + sanitized = xstrndup(initial_src, sanitized_pos); + } + sanitized = (char*) xrealloc(sanitized, sanitized_pos + 5); + sanitized[sanitized_pos++] = '['; + c = (unsigned char) *src++; + sanitized[sanitized_pos++] = "0123456789ABCDEF"[c >> 4]; + sanitized[sanitized_pos++] = "0123456789ABCDEF"[c & 0xf]; + sanitized[sanitized_pos++] = ']'; + sanitized[sanitized_pos] = '\0'; + } + + if (sanitized) + VERB2 log("note: bad utf8, converted '%s' -> '%s'", initial_src, sanitized); + + return sanitized; /* usually NULL: the whole string is ok */ +} void store_string(DBusMessageIter* iter, const char* val) { - if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, &val)) + const char *sanitized = sanitize_utf8(val); + if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, sanitized ? &sanitized : &val)) die_out_of_memory(); + free((char*)sanitized); } -- cgit