diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-02-17 16:04:35 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-02-17 16:04:35 +0100 |
commit | a511b80f126dec00ce9697808a4e287a530a4626 (patch) | |
tree | ae6af2323a2edaeea9c23dde4423da653a67ca80 /lib | |
parent | d6450b3383e2d46453be0f6c13014137fabc7bca (diff) | |
download | abrt-a511b80f126dec00ce9697808a4e287a530a4626.tar.gz abrt-a511b80f126dec00ce9697808a4e287a530a4626.tar.xz abrt-a511b80f126dec00ce9697808a4e287a530a4626.zip |
lib/Utils/abrt_dbus: utf8-sanitize all strings in dbus messages (fixes #565876)
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Utils/abrt_dbus.cpp | 98 |
1 files changed, 97 insertions, 1 deletions
diff --git a/lib/Utils/abrt_dbus.cpp b/lib/Utils/abrt_dbus.cpp index 66608410..4ec79b90 100644 --- a/lib/Utils/abrt_dbus.cpp +++ b/lib/Utils/abrt_dbus.cpp @@ -54,10 +54,106 @@ void store_uint64(DBusMessageIter* iter, uint64_t val) if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_UINT64, &val)) die_out_of_memory(); } + +/* dbus daemon will simply close our connection if we send broken utf8. + * Therefore we must never do that. + */ +static char *sanitize_utf8(const char *src) +{ + const char *initial_src = src; + char *sanitized = NULL; + unsigned sanitized_pos = 0; + + while (*src) + { + int bytes = 0; + + unsigned c = (unsigned char) *src; + if (c <= 0x7f) + { + bytes = 1; + goto good_byte; + } + + /* Unicode -> utf8: */ + /* 80-7FF -> 110yyyxx 10xxxxxx */ + /* 800-FFFF -> 1110yyyy 10yyyyxx 10xxxxxx */ + /* 10000-1FFFFF -> 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx */ + /* 200000-3FFFFFF -> 111110tt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ + /* 4000000-FFFFFFFF -> 111111tt 10tttttt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ + do { + c <<= 1; + bytes++; + } while ((c & 0x80) && bytes < 6); + if (bytes == 1) + { + /* A bare "continuation" byte. Say, 80 */ + goto bad_byte; + } + + c = (uint8_t)(c) >> bytes; + { + const char *pp = src; + int cnt = bytes; + while (--cnt) + { + unsigned ch = (unsigned char) *++pp; + if ((ch & 0xc0) != 0x80) /* Missing "continuation" byte. Example: e0 80 */ + { + goto bad_byte; + } + c = (c << 6) + (ch & 0x3f); + } + } + /* TODO */ + /* Need to check that c isn't produced by overlong encoding */ + /* Example: 11000000 10000000 converts to NUL */ + /* 11110000 10000000 10000100 10000000 converts to 0x100 */ + /* correct encoding: 11000100 10000000 */ + if (c <= 0x7f) /* crude check: only catches bad encodings which map to chars <= 7f */ + { + goto bad_byte; + } + + good_byte: + while (--bytes >= 0) + { + c = (unsigned char) *src++; + if (sanitized) + { + sanitized = (char*) xrealloc(sanitized, sanitized_pos + 2); + sanitized[sanitized_pos++] = c; + sanitized[sanitized_pos] = '\0'; + } + } + continue; + + bad_byte: + if (!sanitized) + { + sanitized_pos = src - initial_src; + sanitized = xstrndup(initial_src, sanitized_pos); + } + sanitized = (char*) xrealloc(sanitized, sanitized_pos + 5); + sanitized[sanitized_pos++] = '['; + c = (unsigned char) *src++; + sanitized[sanitized_pos++] = "0123456789ABCDEF"[c >> 4]; + sanitized[sanitized_pos++] = "0123456789ABCDEF"[c & 0xf]; + sanitized[sanitized_pos++] = ']'; + sanitized[sanitized_pos] = '\0'; + } + + if (sanitized) + VERB2 log("note: bad utf8, converted '%s' -> '%s'", initial_src, sanitized); + + return sanitized; /* usually NULL: the whole string is ok */ +} void store_string(DBusMessageIter* iter, const char* val) { - if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, &val)) + const char *sanitized = sanitize_utf8(val); + if (!dbus_message_iter_append_basic(iter, DBUS_TYPE_STRING, sanitized ? &sanitized : &val)) die_out_of_memory(); + free((char*)sanitized); } |