Fixed bug found by John Blair where trim_string wasn't

correctly trimming trailing multibyte code page strings. Jeremy.
author: Jeremy Allison <jra@samba.org> 1998-10-20 20:08:35 +0000
committer: Jeremy Allison <jra@samba.org> 1998-10-20 20:08:35 +0000
commit: dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc (patch)
tree: df14cd3ddfe3312ecade7c100e0000bea5204e7b /source/lib
parent: 05a297e3a98c14360782af4ad0d851638fb5da9a (diff)
download: samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.tar.gz
samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.tar.xz
samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.zip
2 files changed, 303 insertions, 193 deletions
diff --git a/source/lib/kanji.c b/source/lib/kanji.c
index 04eecb54375..b85f0c93540 100644
--- a/source/lib/kanji.c
+++ b/source/lib/kanji.c
@@ -88,105 +88,110 @@ static char hex_tag = HEXTAG;
 /*******************************************************************
   SHIFT JIS functions
 ********************************************************************/
+
 /*******************************************************************
  search token from S1 separated any char of S2
  S1 contains SHIFT JIS chars.
 ********************************************************************/
+
 static char *sj_strtok(char *s1, char *s2)
 {
-    static char *s = NULL;
-    char *q;
-    if (!s1) {
-	if (!s) {
-	    return NULL;
-	}
-	s1 = s;
-    }
-    for (q = s1; *s1; ) {
-	if (is_shift_jis (*s1)) {
-	    s1 += 2;
-	} else if (is_kana (*s1)) {
-	    s1++;
-	} else {
-	    char *p = strchr (s2, *s1);
-	    if (p) {
-		if (s1 != q) {
-		    s = s1 + 1;
-		    *s1 = '\0';
-		    return q;
-		}
-		q = s1 + 1;
-	    }
-	    s1++;
-	}
+  static char *s = NULL;
+  char *q;
+  if (!s1) {
+    if (!s) {
+      return NULL;
     }
-    s = NULL;
-    if (*q) {
-	return q;
+    s1 = s;
+  }
+  for (q = s1; *s1; ) {
+    if (is_shift_jis (*s1)) {
+      s1 += 2;
+    } else if (is_kana (*s1)) {
+      s1++;
+    } else {
+      char *p = strchr (s2, *s1);
+      if (p) {
+        if (s1 != q) {
+          s = s1 + 1;
+          *s1 = '\0';
+          return q;
+        }
+        q = s1 + 1;
+      }
+      s1++;
     }
-    return NULL;
+  }
+  s = NULL;
+  if (*q) {
+    return q;
+  }
+  return NULL;
 }
 
 /*******************************************************************
  search string S2 from S1
  S1 contains SHIFT JIS chars.
 ********************************************************************/
+
 static char *sj_strstr(char *s1, char *s2)
 {
-    int len = strlen ((char *) s2);
-    if (!*s2) 
-	return (char *) s1;
-    for (;*s1;) {
-	if (*s1 == *s2) {
-	    if (strncmp (s1, s2, len) == 0)
-		return (char *) s1;
-	}
-	if (is_shift_jis (*s1)) {
-	    s1 += 2;
-	} else {
-	    s1++;
-	}
+  int len = strlen ((char *) s2);
+  if (!*s2) 
+    return (char *) s1;
+  for (;*s1;) {
+    if (*s1 == *s2) {
+      if (strncmp (s1, s2, len) == 0)
+        return (char *) s1;
+    }
+    if (is_shift_jis (*s1)) {
+      s1 += 2;
+    } else {
+      s1++;
     }
-    return 0;
+  }
+  return 0;
 }
 
 /*******************************************************************
  Search char C from beginning of S.
  S contains SHIFT JIS chars.
 ********************************************************************/
+
 static char *sj_strchr (char *s, int c)
 {
-    for (; *s; ) {
-	if (*s == c)
-	    return (char *) s;
-	if (is_shift_jis (*s)) {
-	    s += 2;
-	} else {
-	    s++;
-	}
+  for (; *s; ) {
+    if (*s == c)
+      return (char *) s;
+    if (is_shift_jis (*s)) {
+      s += 2;
+    } else {
+      s++;
     }
-    return 0;
+  }
+  return 0;
 }
 
 /*******************************************************************
  Search char C end of S.
  S contains SHIFT JIS chars.
 ********************************************************************/
+
 static char *sj_strrchr(char *s, int c)
 {
-    char *q;
+  char *q;
 
-    for (q = 0; *s; ) {
-	if (*s == c) {
-	    q = (char *) s;
-	}
-	if (is_shift_jis (*s)) {
-	    s += 2;
-	} else {
-	    s++;
-	}
+  for (q = 0; *s; ) {
+    if (*s == c) {
+      q = (char *) s;
     }
-    return q;
+    if (is_shift_jis (*s)) {
+      s += 2;
+    } else {
+      s++;
+    }
+  }
+  return q;
 }
 
 /*******************************************************************
@@ -259,35 +264,35 @@ static BOOL simpch_is_multibyte_char_1(char c)
 
 static char *generic_multibyte_strtok(char *s1, char *s2)
 {
-    static char *s = NULL;
-    char *q;
-    if (!s1) {
-        if (!s) {
-            return NULL;
-        }
-        s1 = s;
+  static char *s = NULL;
+  char *q;
+  if (!s1) {
+    if (!s) {
+      return NULL;
     }
-    for (q = s1; *s1; ) {
-        if ((*is_multibyte_char_1)(*s1)) {
-            s1 += 2;
-        } else {
-            char *p = strchr (s2, *s1);
-            if (p) {
-                if (s1 != q) {
-                    s = s1 + 1;
-                    *s1 = '\0';
-                    return q;
-                }
-                q = s1 + 1;
-            }
-            s1++;
+    s1 = s;
+  }
+  for (q = s1; *s1; ) {
+    if ((*is_multibyte_char_1)(*s1)) {
+        s1 += 2;
+    } else {
+      char *p = strchr (s2, *s1);
+      if (p) {
+        if (s1 != q) {
+          s = s1 + 1;
+          *s1 = '\0';
+          return q;
         }
+        q = s1 + 1;
+      }
+    s1++;
     }
-    s = NULL;
-    if (*q) {
-        return q;
-    }
-    return NULL;
+  }
+  s = NULL;
+  if (*q) {
+    return q;
+  }
+  return NULL;
 }
 
 /*******************************************************************
@@ -297,21 +302,21 @@ static char *generic_multibyte_strtok(char *s1, char *s2)
 
 static char *generic_multibyte_strstr(char *s1, char *s2)
 {
-    int len = strlen ((char *) s2);
-    if (!*s2)
+  int len = strlen ((char *) s2);
+  if (!*s2)
+    return (char *) s1;
+  for (;*s1;) {
+    if (*s1 == *s2) {
+      if (strncmp (s1, s2, len) == 0)
         return (char *) s1;
-    for (;*s1;) {
-        if (*s1 == *s2) {
-            if (strncmp (s1, s2, len) == 0)
-                return (char *) s1;
-        }
-        if ((*is_multibyte_char_1)(*s1)) {
-            s1 += 2;
-        } else {
-            s1++;
-        }
     }
-    return 0;
+    if ((*is_multibyte_char_1)(*s1)) {
+      s1 += 2;
+    } else {
+      s1++;
+    }
+  }
+  return 0;
 }
 
 /*******************************************************************
@@ -321,16 +326,16 @@ static char *generic_multibyte_strstr(char *s1, char *s2)
 
 static char *generic_multibyte_strchr(char *s, int c)
 {
-    for (; *s; ) {
-        if (*s == c)
-            return (char *) s;
-        if ((*is_multibyte_char_1)(*s)) {
-            s += 2;
-        } else {
-            s++;
-        }
+  for (; *s; ) {
+    if (*s == c)
+      return (char *) s;
+    if ((*is_multibyte_char_1)(*s)) {
+      s += 2;
+    } else {
+      s++;
     }
-    return 0;
+  }
+  return 0;
 }
 
 /*******************************************************************
@@ -340,19 +345,19 @@ static char *generic_multibyte_strchr(char *s, int c)
 
 static char *generic_multibyte_strrchr(char *s, int c)
 {
-    char *q;
+  char *q;
  
-    for (q = 0; *s; ) {
-        if (*s == c) {
-            q = (char *) s;
-        }
-        if ((*is_multibyte_char_1)(*s)) {
-            s += 2;
-        } else {
-            s++;
-        }
+  for (q = 0; *s; ) {
+    if (*s == c) {
+      q = (char *) s;
     }
-    return q;
+    if ((*is_multibyte_char_1)(*s)) {
+      s += 2;
+    } else {
+      s++;
+    }
+  }
+  return q;
 }
 
 /*******************************************************************
@@ -370,119 +375,125 @@ static int skip_generic_multibyte_char(char c)
 /*******************************************************************
   Code conversion
 ********************************************************************/
+
 /* convesion buffer */
 static char cvtbuf[1024];
 
 /*******************************************************************
   EUC <-> SJIS
 ********************************************************************/
+
 static int euc2sjis (int hi, int lo)
 {
-    if (hi & 1)
-	return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
-	    (lo - (lo >= 0xe0 ? 0x60 : 0x61));
-    else
-	return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
+  if (hi & 1)
+    return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
+            (lo - (lo >= 0xe0 ? 0x60 : 0x61));
+  else
+    return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
 }
 
 static int sjis2euc (int hi, int lo)
 {
-    if (lo >= 0x9f)
-	return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
-    else
-	return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
-	    (lo + (lo >= 0x7f ? 0x60 : 0x61));
+  if (lo >= 0x9f)
+    return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
+  else
+    return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
+            (lo + (lo >= 0x7f ? 0x60 : 0x61));
 }
 
 /*******************************************************************
  Convert FROM contain SHIFT JIS codes to EUC codes
  return converted buffer
 ********************************************************************/
+
 static char *sj_to_euc(char *from, BOOL overwrite)
 {
-    char *out;
-    char *save;
-
-    save = (char *) from;
-    for (out = cvtbuf; *from;) {
-	if (is_shift_jis (*from)) {
-	    int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
-	    *out++ = (code >> 8) & 0xff;
-	    *out++ = code;
-	    from += 2;
-	} else if (is_kana (*from)) {
-	    *out++ = (char)euc_kana;
-	    *out++ = *from++;
-	} else {
-	    *out++ = *from++;
-	}
-    }
-    *out = 0;
-    if (overwrite) {
-	pstrcpy((char *) save, (char *) cvtbuf);
-	return (char *) save;
+  char *out;
+  char *save;
+
+  save = (char *) from;
+  for (out = cvtbuf; *from;) {
+    if (is_shift_jis (*from)) {
+      int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
+      *out++ = (code >> 8) & 0xff;
+      *out++ = code;
+      from += 2;
+    } else if (is_kana (*from)) {
+      *out++ = (char)euc_kana;
+      *out++ = *from++;
     } else {
-	return cvtbuf;
+      *out++ = *from++;
     }
+  }
+  *out = 0;
+  if (overwrite) {
+    pstrcpy((char *) save, (char *) cvtbuf);
+    return (char *) save;
+  } else {
+    return cvtbuf;
+  }
 }
 
 /*******************************************************************
  Convert FROM contain EUC codes to SHIFT JIS codes
  return converted buffer
 ********************************************************************/
+
 static char *euc_to_sj(char *from, BOOL overwrite)
 {
-    char *out;
-    char *save;
-
-    save = (char *) from;
-    for (out = cvtbuf; *from; ) {
-	if (is_euc (*from)) {
-	    int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
-	    *out++ = (code >> 8) & 0xff;
-	    *out++ = code;
-	    from += 2;
-	} else if (is_euc_kana (*from)) {
-	    *out++ = from[1];
-	    from += 2;
-	} else {
-	    *out++ = *from++;
-	}
-    }
-    *out = 0;
-    if (overwrite) {
-	pstrcpy(save, (char *) cvtbuf);
-	return save;
+  char *out;
+  char *save;
+
+  save = (char *) from;
+  for (out = cvtbuf; *from; ) {
+    if (is_euc (*from)) {
+      int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
+      *out++ = (code >> 8) & 0xff;
+      *out++ = code;
+      from += 2;
+    } else if (is_euc_kana (*from)) {
+      *out++ = from[1];
+      from += 2;
     } else {
-	return cvtbuf;
+      *out++ = *from++;
     }
+  }
+  *out = 0;
+  if (overwrite) {
+    pstrcpy(save, (char *) cvtbuf);
+    return save;
+  } else {
+    return cvtbuf;
+  }
 }
 
 /*******************************************************************
   JIS7,JIS8,JUNET <-> SJIS
 ********************************************************************/
+
 static int sjis2jis(int hi, int lo)
 {
-    if (lo >= 0x9f)
-	return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
-    else
-	return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
-	    (lo - (lo >= 0x7f ? 0x20 : 0x1f));
+  if (lo >= 0x9f)
+    return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
+  else
+    return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
+            (lo - (lo >= 0x7f ? 0x20 : 0x1f));
 }
 
 static int jis2sjis(int hi, int lo)
 {
-    if (hi & 1)
-	return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
-	    (lo + (lo >= 0x60 ? 0x20 : 0x1f));
-    else
-	return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
+  if (hi & 1)
+    return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
+            (lo + (lo >= 0x60 ? 0x20 : 0x1f));
+  else
+    return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
 }
 
 /*******************************************************************
  Convert FROM contain JIS codes to SHIFT JIS codes
  return converted buffer
 ********************************************************************/
+
 static char *jis8_to_sj(char *from, BOOL overwrite)
 {
     char *out;
@@ -533,6 +544,7 @@ static char *jis8_to_sj(char *from, BOOL overwrite)
  Convert FROM contain SHIFT JIS codes to JIS codes
  return converted buffer
 ********************************************************************/
+
 static char *sj_to_jis8(char *from, BOOL overwrite)
 {
     char *out;
@@ -1146,6 +1158,17 @@ static BOOL not_multibyte_char_1(char c)
 }
 
 /*******************************************************************
+ Function to determine if we are in a multibyte code page.
+*******************************************************************/
+
+static BOOL is_multibyte_codepage_val = False;
+
+BOOL is_multibyte_codepage(void)
+{
+  return is_multibyte_codepage_val;
+}
+
+/*******************************************************************
  Setup the function pointers for the functions that are replaced
  when multi-byte codepages are used.
 
@@ -1165,6 +1188,7 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
     _skip_multibyte_char = skip_kanji_multibyte_char;
     is_multibyte_char_1 = is_kanji_multibyte_char_1;
+    is_multibyte_codepage_val = True;
     break;
   case HANGUL_CODEPAGE:
     multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
@@ -1173,6 +1197,7 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
     _skip_multibyte_char = skip_generic_multibyte_char;
     is_multibyte_char_1 = hangul_is_multibyte_char_1;
+    is_multibyte_codepage_val = True;
     break;
   case BIG5_CODEPAGE:
     multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
@@ -1181,6 +1206,7 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
     _skip_multibyte_char = skip_generic_multibyte_char;
     is_multibyte_char_1 = big5_is_multibyte_char_1;
+    is_multibyte_codepage_val = True;
     break;
   case SIMPLIFIED_CHINESE_CODEPAGE:
     multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
@@ -1189,6 +1215,7 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
     _skip_multibyte_char = skip_generic_multibyte_char;
     is_multibyte_char_1 = simpch_is_multibyte_char_1;
+    is_multibyte_codepage_val = True;
     break;
   /*
    * Single char size code page.
@@ -1200,6 +1227,7 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strtok = (char *(*)(char *, char *)) strtok;
     _skip_multibyte_char = skip_non_multibyte_char;
     is_multibyte_char_1 = not_multibyte_char_1;
+    is_multibyte_codepage_val = False;
     break; 
   }
 }
diff --git a/source/lib/util.c b/source/lib/util.c
index e5486e6159e..58106acd468 100644
--- a/source/lib/util.c
+++ b/source/lib/util.c
@@ -1117,8 +1117,28 @@ char *skip_string(char *buf,int n)
 }
 
 /*******************************************************************
+ Count the number of characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+ 16.oct.98, jdblair@cobaltnet.com.
+********************************************************************/
+
+size_t str_charnum(char *s)
+{
+  size_t len = 0;
+  
+  while (*s != '\0') {
+    int skip = skip_multibyte_char(*s);
+    s += (skip ? skip : 1);
+    len++;
+  }
+  return len;
+}
+
+/*******************************************************************
 trim the specified elements off the front and back of a string
 ********************************************************************/
+
 BOOL trim_string(char *s,char *front,char *back)
 {
   BOOL ret = False;
@@ -1138,14 +1158,76 @@ BOOL trim_string(char *s,char *front,char *back)
     }
   }
 
-  s_len = strlen(s);
-  while (back_len && s_len >= back_len && 
-        (strncmp(s + s_len - back_len, back, back_len)==0))  
+  /*
+   * We split out the multibyte code page
+   * case here for speed purposes. Under a
+   * multibyte code page we need to walk the
+   * string forwards only and multiple times.
+   * Thanks to John Blair for finding this
+   * one. JRA.
+   */
+
+  if(back_len)
   {
-    ret = True;
-    s[s_len - back_len] = 0;
-    s_len = strlen(s);
-  }
+    if(!is_multibyte_codepage())
+    {
+      s_len = strlen(s);
+      while ((s_len >= back_len) && 
+             (strncmp(s + s_len - back_len, back, back_len)==0))  
+      {
+        ret = True;
+        s[s_len - back_len] = '\0';
+        s_len = strlen(s);
+      }
+    }
+    else
+    {
+
+      /*
+       * Multibyte code page case.
+       * Keep going through the string, trying
+       * to match the 'back' string with the end
+       * of the string. If we get a match, truncate
+       * 'back' off the end of the string and
+       * go through the string again from the
+       * start. Keep doing this until we have
+       * gone through the string with no match
+       * at the string end.
+       */
+
+      size_t mb_back_len = str_charnum(back);
+      size_t mb_s_len = str_charnum(s);
+
+      while(mb_s_len >= mb_back_len)
+      {
+        size_t charcount = 0;
+        char *mbp = s;
+
+        while(charcount < (mb_s_len - mb_back_len))
+        {
+          size_t skip = skip_multibyte_char(*mbp);
+          mbp += (skip ? skip : 1);
+          charcount++;
+        }
+
+        /*
+         * mbp now points at mb_back_len multibyte
+         * characters from the end of s.
+         */
+
+        if(strcmp(mbp, back) == 0)
+        {
+          ret = True;
+          *mbp = '\0';
+          mb_s_len = str_charnum(s);
+          mbp = s;
+        }
+        else
+          break;
+      } /* end while mb_s_len... */
+    } /* end else .. */
+  } /* end if back_len .. */
+
   return(ret);
 }
author	Jeremy Allison <jra@samba.org>	1998-10-20 20:08:35 +0000
committer	Jeremy Allison <jra@samba.org>	1998-10-20 20:08:35 +0000
commit	dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc (patch)
tree	df14cd3ddfe3312ecade7c100e0000bea5204e7b /source/lib
parent	05a297e3a98c14360782af4ad0d851638fb5da9a (diff)
download	samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.tar.gz samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.tar.xz samba-dbdbce29f56d03f6abf1ee3d96ca2032e688dcbc.zip