diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2012-11-29 15:23:03 +0100 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2012-12-06 20:30:30 +0100 |
commit | 55946e2c707ce18d2b50c5d0134adfd65852d0e4 (patch) | |
tree | 9578be3f8b1489c54342b0be986289af83750cf4 /libmsi | |
parent | f4db108fd77b3e7359e6ad57ffc989aa2f31b2d2 (diff) | |
download | msitools-55946e2c707ce18d2b50c5d0134adfd65852d0e4.tar.gz msitools-55946e2c707ce18d2b50c5d0134adfd65852d0e4.tar.xz msitools-55946e2c707ce18d2b50c5d0134adfd65852d0e4.zip |
decode stream names from UTF-8
This is the encoding that libgsf accepts.
Diffstat (limited to 'libmsi')
-rw-r--r-- | libmsi/msipriv.h | 28 | ||||
-rw-r--r-- | libmsi/table.c | 54 |
2 files changed, 65 insertions, 17 deletions
diff --git a/libmsi/msipriv.h b/libmsi/msipriv.h index d1c2290..14a3453 100644 --- a/libmsi/msipriv.h +++ b/libmsi/msipriv.h @@ -372,7 +372,7 @@ extern bool MSI_RecordsAreFieldsEqual(LibmsiRecord *a, LibmsiRecord *b, unsigned /* stream internals */ extern void enum_stream_names( IStorage *stg ); extern WCHAR *encode_streamname(bool bTable, const WCHAR *in); -extern bool decode_streamname(const WCHAR *in, WCHAR *out); +extern void decode_streamname(const WCHAR *in, WCHAR *out); /* database internals */ extern unsigned msi_get_raw_stream( LibmsiDatabase *, const WCHAR *, IStream **); @@ -563,6 +563,32 @@ static inline char *strcpynA( char *dst, const char *src, unsigned count ) return dst; } +static inline char *strdupWtoUTF8( const WCHAR *str ) +{ + char *ret = NULL; + unsigned len; + + if (!str) return ret; + len = WideCharToMultiByte( CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); + ret = msi_alloc( len ); + if (ret) + WideCharToMultiByte( CP_UTF8, 0, str, -1, ret, len, NULL, NULL ); + return ret; +} + +static inline WCHAR *strdupUTF8toW( const char *str ) +{ + WCHAR *ret = NULL; + unsigned len; + + if (!str) return ret; + len = MultiByteToWideChar( CP_UTF8, 0, str, -1, NULL, 0 ); + ret = msi_alloc( len * sizeof(WCHAR) ); + if (ret) + MultiByteToWideChar( CP_UTF8, 0, str, -1, ret, len ); + return ret; +} + static inline char *strdupWtoA( const WCHAR *str ) { char *ret = NULL; diff --git a/libmsi/table.c b/libmsi/table.c index 9bc61e8..3b89a9a 100644 --- a/libmsi/table.c +++ b/libmsi/table.c @@ -185,30 +185,52 @@ static int mime2utf(int x) return '_'; } -bool decode_streamname(const WCHAR *in, WCHAR *out) +void decode_streamname(const WCHAR *in, WCHAR *out) { - WCHAR ch; unsigned count = 0; + uint8_t *enc_utf8 = strdupWtoUTF8(in); + uint8_t *p = enc_utf8; - while ( (ch = *in++) ) + uint8_t *dec_utf8 = strdup(enc_utf8); + uint8_t *q = dec_utf8; + + while ( *p ) { - if( (ch >= 0x3800 ) && (ch < 0x4840 ) ) + uint8_t ch = *p; + if( (ch == 0xe3 && p[1] >= 0xa0) || (ch == 0xe4 && p[1] < 0xa0) ) { - if( ch >= 0x4800 ) - ch = mime2utf(ch-0x4800); - else - { - ch -= 0x3800; - *out++ = mime2utf(ch&0x3f); - count++; - ch = mime2utf((ch>>6)&0x3f); - } + /* UTF-8 encoding of 0x3800..0x47ff. */ + *q++ = mime2utf(p[2]&0x7f); + *q++ = mime2utf(p[1]^0xa0); + p += 3; + count += 2; + continue; + } + if( ch == 0xe4 && p[1] == 0xa0 ) { + /* UTF-8 encoding of 0x4800..0x483f. */ + *q++ = mime2utf(p[2]&0x7f); + p += 3; + count++; + continue; + } + *q++ = *p++; + if( ch >= 0xc1) { + *q++ = *p++; + } + if( ch >= 0xe0) { + *q++ = *p++; + } + if( ch >= 0xf0) { + *q++ = *p++; } - *out++ = ch; count++; } - *out = 0; - return count; + *q = 0; + msi_free(enc_utf8); + + /* convert UTF8 to WCHAR */ + MultiByteToWideChar( CP_UTF8, 0, dec_utf8, -1, out, count + 1 ); + msi_free(dec_utf8); } void enum_stream_names( IStorage *stg ) |