diff options
author | Jeremy Allison <jra@samba.org> | 2003-09-13 22:41:21 +0000 |
---|---|---|
committer | Jeremy Allison <jra@samba.org> | 2003-09-13 22:41:21 +0000 |
commit | f23acb4ca5feac8ad2acfa1baf7df31283aba3ea (patch) | |
tree | 24114a8649d8b3faaee7a1232642bfb490d92dd1 /source/lib | |
parent | f23c9d36b0cd4083722012e4a94df8295f29d04c (diff) | |
download | samba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.tar.gz samba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.tar.xz samba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.zip |
Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS !
(rant off :-). Inspired by work from Benjamin Riefenstahl <Benjamin.Riefenstahl@epost.de>.
Also add MacOSX/Darwin configure fixes.
Jerry - can we put this in 3.0 release ? :-).
Jeremy.
Diffstat (limited to 'source/lib')
-rw-r--r-- | source/lib/charcnv.c | 75 | ||||
-rw-r--r-- | source/lib/util_str.c | 15 |
2 files changed, 63 insertions, 27 deletions
diff --git a/source/lib/charcnv.c b/source/lib/charcnv.c index e8ae40dbe29..dafc88fb77a 100644 --- a/source/lib/charcnv.c +++ b/source/lib/charcnv.c @@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to, descriptor = conv_handles[from][to]; + if (srclen == (size_t)-1) { + if (from == CH_UCS2) { + srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; + } else { + srclen = strlen((const char *)src)+1; + } + } + if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) DEBUG(0,("convert_string_internal: Conversion not supported.\n")); @@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to, void const *src, size_t srclen, void *dest, size_t destlen) { + /* + * NB. We deliberately don't do a strlen here is srclen == -1. + * This is very expensive over millions of calls and is taken + * care of in the slow path in convert_string_internal. JRA. + */ + if (srclen == 0) return 0; if (from != CH_UCS2 && to != CH_UCS2) { const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; size_t retval = 0; /* If all characters are ascii, fast path here. */ - while (srclen && destlen) { + while (slen && dlen) { if ((lastp = *p) <= 0x7f) { *q++ = *p++; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while ((srclen >= 2) && destlen) { + while ((slen >= 2) && dlen) { if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { *q++ = *p; - if (srclen != (size_t)-1) { - srclen -= 2; + if (slen != (size_t)-1) { + slen -= 2; } p += 2; - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = (strlen_w((const smb_ucs2_t *)p)+1) * 2; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while (srclen && (destlen >= 2)) { + while (slen && (dlen >= 2)) { if ((lastp = *p) <= 0x7F) { *q++ = *p++; *q++ = '\0'; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen -= 2; + dlen -= 2; retval += 2; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; } + +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + general_case: +#endif return convert_string_internal(from, to, src, srclen, dest, destlen); } diff --git a/source/lib/util_str.c b/source/lib/util_str.c index c065bfe9db6..15ac1639a9a 100644 --- a/source/lib/util_str.c +++ b/source/lib/util_str.c @@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc) return; /* Slow (mb) path. */ +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + p = s; +#endif push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE); string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc)); pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE); @@ -1175,26 +1179,31 @@ char *string_truncate(char *s, unsigned int length) We convert via ucs2 for now. **/ -char *strchr_m(const char *s, char c) +char *strchr_m(const char *src, char c) { wpstring ws; pstring s2; smb_ucs2_t *p; + const char *s; /* this is quite a common operation, so we want it to be fast. We optimise for the ascii case, knowing that all our supported multi-byte character sets are ascii-compatible (ie. they match for the first 128 chars) */ - while (*s && !(((unsigned char)s[0]) & 0x80)) { + for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { if (*s == c) return s; - s++; } if (!*s) return NULL; +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + s = src; +#endif + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); p = strchr_w(ws, UCS2_CHAR(c)); if (!p) |