summaryrefslogtreecommitdiffstats
path: root/source/lib
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>2003-09-13 22:41:21 +0000
committerJeremy Allison <jra@samba.org>2003-09-13 22:41:21 +0000
commitf23acb4ca5feac8ad2acfa1baf7df31283aba3ea (patch)
tree24114a8649d8b3faaee7a1232642bfb490d92dd1 /source/lib
parentf23c9d36b0cd4083722012e4a94df8295f29d04c (diff)
downloadsamba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.tar.gz
samba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.tar.xz
samba-f23acb4ca5feac8ad2acfa1baf7df31283aba3ea.zip
Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS !
(rant off :-). Inspired by work from Benjamin Riefenstahl <Benjamin.Riefenstahl@epost.de>. Also add MacOSX/Darwin configure fixes. Jerry - can we put this in 3.0 release ? :-). Jeremy.
Diffstat (limited to 'source/lib')
-rw-r--r--source/lib/charcnv.c75
-rw-r--r--source/lib/util_str.c15
2 files changed, 63 insertions, 27 deletions
diff --git a/source/lib/charcnv.c b/source/lib/charcnv.c
index e8ae40dbe29..dafc88fb77a 100644
--- a/source/lib/charcnv.c
+++ b/source/lib/charcnv.c
@@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to,
descriptor = conv_handles[from][to];
+ if (srclen == (size_t)-1) {
+ if (from == CH_UCS2) {
+ srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
+ } else {
+ srclen = strlen((const char *)src)+1;
+ }
+ }
+
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
if (!conv_silent)
DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
@@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen)
{
+ /*
+ * NB. We deliberately don't do a strlen here is srclen == -1.
+ * This is very expensive over millions of calls and is taken
+ * care of in the slow path in convert_string_internal. JRA.
+ */
+
if (srclen == 0)
return 0;
if (from != CH_UCS2 && to != CH_UCS2) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
+ size_t slen = srclen;
+ size_t dlen = destlen;
unsigned char lastp;
size_t retval = 0;
/* If all characters are ascii, fast path here. */
- while (srclen && destlen) {
+ while (slen && dlen) {
if ((lastp = *p) <= 0x7f) {
*q++ = *p++;
- if (srclen != (size_t)-1) {
- srclen--;
+ if (slen != (size_t)-1) {
+ slen--;
}
- destlen--;
+ dlen--;
retval++;
if (!lastp)
break;
} else {
- if (srclen == (size_t)-1) {
- srclen = strlen(p)+1;
- }
- return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ goto general_case;
+#else
+ return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
}
}
return retval;
@@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to,
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
+ size_t slen = srclen;
+ size_t dlen = destlen;
unsigned char lastp;
/* If all characters are ascii, fast path here. */
- while ((srclen >= 2) && destlen) {
+ while ((slen >= 2) && dlen) {
if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
*q++ = *p;
- if (srclen != (size_t)-1) {
- srclen -= 2;
+ if (slen != (size_t)-1) {
+ slen -= 2;
}
p += 2;
- destlen--;
+ dlen--;
retval++;
if (!lastp)
break;
} else {
- if (srclen == (size_t)-1) {
- srclen = (strlen_w((const smb_ucs2_t *)p)+1) * 2;
- }
- return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ goto general_case;
+#else
+ return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
}
}
return retval;
@@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to,
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
+ size_t slen = srclen;
+ size_t dlen = destlen;
unsigned char lastp;
/* If all characters are ascii, fast path here. */
- while (srclen && (destlen >= 2)) {
+ while (slen && (dlen >= 2)) {
if ((lastp = *p) <= 0x7F) {
*q++ = *p++;
*q++ = '\0';
- if (srclen != (size_t)-1) {
- srclen--;
+ if (slen != (size_t)-1) {
+ slen--;
}
- destlen -= 2;
+ dlen -= 2;
retval += 2;
if (!lastp)
break;
} else {
- if (srclen == (size_t)-1) {
- srclen = strlen(p)+1;
- }
- return retval + convert_string_internal(from, to, p, srclen, q, destlen);
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ goto general_case;
+#else
+ return retval + convert_string_internal(from, to, p, slen, q, dlen);
+#endif
}
}
return retval;
}
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ general_case:
+#endif
return convert_string_internal(from, to, src, srclen, dest, destlen);
}
diff --git a/source/lib/util_str.c b/source/lib/util_str.c
index c065bfe9db6..15ac1639a9a 100644
--- a/source/lib/util_str.c
+++ b/source/lib/util_str.c
@@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc)
return;
/* Slow (mb) path. */
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ /* With compose characters we must restart from the beginning. JRA. */
+ p = s;
+#endif
push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE);
string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc));
pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE);
@@ -1175,26 +1179,31 @@ char *string_truncate(char *s, unsigned int length)
We convert via ucs2 for now.
**/
-char *strchr_m(const char *s, char c)
+char *strchr_m(const char *src, char c)
{
wpstring ws;
pstring s2;
smb_ucs2_t *p;
+ const char *s;
/* this is quite a common operation, so we want it to be
fast. We optimise for the ascii case, knowing that all our
supported multi-byte character sets are ascii-compatible
(ie. they match for the first 128 chars) */
- while (*s && !(((unsigned char)s[0]) & 0x80)) {
+ for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
if (*s == c)
return s;
- s++;
}
if (!*s)
return NULL;
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ /* With compose characters we must restart from the beginning. JRA. */
+ s = src;
+#endif
+
push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE);
p = strchr_w(ws, UCS2_CHAR(c));
if (!p)