summaryrefslogtreecommitdiffstats
path: root/source3/lib/kanji.c
diff options
context:
space:
mode:
Diffstat (limited to 'source3/lib/kanji.c')
-rw-r--r--source3/lib/kanji.c224
1 files changed, 93 insertions, 131 deletions
diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c
index 994cf6e1bd0..e430c1a986c 100644
--- a/source3/lib/kanji.c
+++ b/source3/lib/kanji.c
@@ -54,11 +54,13 @@ char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
* charcnv.c.
*/
-static int not_multibyte_char(char);
+static int skip_non_multibyte_char(char);
+static BOOL not_multibyte_char_1(char);
char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
-int (*is_multibyte_char)(char) = not_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
+BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
#else /* KANJI */
@@ -68,11 +70,13 @@ int (*is_multibyte_char)(char) = not_multibyte_char;
*/
static char *sj_to_sj(char *from, BOOL overwrite);
-static int kanji_multibyte_char(char);
+static int skip_kanji_multibyte_char(char);
+static BOOL kanji_multibyte_char_1(char);
char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
-int (*is_multibyte_char)(char) = kanji_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
+int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
#endif /* KANJI */
@@ -186,10 +190,10 @@ static char *sj_strrchr(char *s, int c)
}
/*******************************************************************
- Kanji multibyte char function.
+ Kanji multibyte char skip function.
*******************************************************************/
-static int kanji_multibyte_char(char c)
+static int skip_kanji_multibyte_char(char c)
{
if(is_shift_jis(c)) {
return 2;
@@ -200,128 +204,60 @@ static int kanji_multibyte_char(char c)
}
/*******************************************************************
- Hangul (Korean - code page 949) functions
-********************************************************************/
-/*******************************************************************
- search token from S1 separated any char of S2
- S1 contains hangul chars.
-********************************************************************/
-static char *hangul_strtok(char *s1, char *s2)
+ Kanji multibyte char identification.
+*******************************************************************/
+
+static BOOL is_kanji_multibyte_char_1(char c)
{
- static char *s = NULL;
- char *q;
- if (!s1) {
- if (!s) {
- return NULL;
- }
- s1 = s;
- }
- for (q = s1; *s1; ) {
- if (is_hangul (*s1)) {
- s1 += 2;
- } else {
- char *p = strchr (s2, *s1);
- if (p) {
- if (s1 != q) {
- s = s1 + 1;
- *s1 = '\0';
- return q;
- }
- q = s1 + 1;
- }
- s1++;
- }
- }
- s = NULL;
- if (*q) {
- return q;
- }
- return NULL;
+ return is_shift_jis(c);
}
/*******************************************************************
- search string S2 from S1
- S1 contains hangul chars.
+ The following functions are the only ones needed to do multibyte
+ support for Hangul, Big5 and Simplified Chinese. Most of the
+ real work for these codepages is done in the generic multibyte
+ functions. The only reason these functions are needed at all
+ is that the is_xxx(c) calls are really preprocessor macros.
********************************************************************/
-static char *hangul_strstr(char *s1, char *s2)
-{
- int len = strlen ((char *) s2);
- if (!*s2)
- return (char *) s1;
- for (;*s1;) {
- if (*s1 == *s2) {
- if (strncmp (s1, s2, len) == 0)
- return (char *) s1;
- }
- if (is_hangul (*s1)) {
- s1 += 2;
- } else {
- s1++;
- }
- }
- return 0;
-}
/*******************************************************************
- Search char C from beginning of S.
- S contains hangul chars.
+ Hangul (Korean - code page 949) function.
********************************************************************/
-static char *hangul_strchr (char *s, int c)
+
+static BOOL hangul_is_multibyte_char_1(char c)
{
- for (; *s; ) {
- if (*s == c)
- return (char *) s;
- if (is_hangul (*s)) {
- s += 2;
- } else {
- s++;
- }
- }
- return 0;
+ return is_hangul(c);
}
/*******************************************************************
- Search char C end of S.
- S contains hangul chars.
+ Big5 Traditional Chinese (code page 950) function.
********************************************************************/
-static char *hangul_strrchr(char *s, int c)
+
+static BOOL big5_is_multibyte_char_1(char c)
{
- char *q;
-
- for (q = 0; *s; ) {
- if (*s == c) {
- q = (char *) s;
- }
- if (is_hangul (*s)) {
- s += 2;
- } else {
- s++;
- }
- }
- return q;
+ return is_big5_c1(c);
}
/*******************************************************************
- Hangul multibyte char function.
-*******************************************************************/
+ Simplified Chinese (code page 936) function.
+********************************************************************/
-static int hangul_multibyte_char(char c)
+static BOOL simpch_is_multibyte_char_1(char c)
{
- if( is_hangul(c)) {
- return 2;
- }
- return 0;
+ return is_simpch_c1(c);
}
/*******************************************************************
- Big5 Traditional Chinese (code page 950) functions
+ Generic multibyte functions - used by Hangul, Big5 and Simplified
+ Chinese codepages.
********************************************************************/
/*******************************************************************
search token from S1 separated any char of S2
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
********************************************************************/
-static char *big5_strtok(char *s1, char *s2)
+
+static char *generic_multibyte_strtok(char *s1, char *s2)
{
static char *s = NULL;
char *q;
@@ -332,7 +268,7 @@ static char *big5_strtok(char *s1, char *s2)
s1 = s;
}
for (q = s1; *s1; ) {
- if (is_big5_c1 (*s1)) {
+ if ((*is_multibyte_char_1)(*s1)) {
s1 += 2;
} else {
char *p = strchr (s2, *s1);
@@ -356,9 +292,10 @@ static char *big5_strtok(char *s1, char *s2)
/*******************************************************************
search string S2 from S1
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
********************************************************************/
-static char *big5_strstr(char *s1, char *s2)
+
+static char *generic_multibyte_strstr(char *s1, char *s2)
{
int len = strlen ((char *) s2);
if (!*s2)
@@ -368,7 +305,7 @@ static char *big5_strstr(char *s1, char *s2)
if (strncmp (s1, s2, len) == 0)
return (char *) s1;
}
- if (is_big5_c1 (*s1)) {
+ if ((*is_multibyte_char_1)(*s1)) {
s1 += 2;
} else {
s1++;
@@ -379,14 +316,15 @@ static char *big5_strstr(char *s1, char *s2)
/*******************************************************************
Search char C from beginning of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
********************************************************************/
-static char *big5_strchr (char *s, int c)
+
+static char *generic_multibyte_strchr(char *s, int c)
{
for (; *s; ) {
if (*s == c)
return (char *) s;
- if (is_big5_c1 (*s)) {
+ if ((*is_multibyte_char_1)(*s)) {
s += 2;
} else {
s++;
@@ -397,9 +335,10 @@ static char *big5_strchr (char *s, int c)
/*******************************************************************
Search char C end of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
********************************************************************/
-static char *big5_strrchr(char *s, int c)
+
+static char *generic_multibyte_strrchr(char *s, int c)
{
char *q;
@@ -407,7 +346,7 @@ static char *big5_strrchr(char *s, int c)
if (*s == c) {
q = (char *) s;
}
- if (is_big5_c1 (*s)) {
+ if ((*is_multibyte_char_1)(*s)) {
s += 2;
} else {
s++;
@@ -417,12 +356,12 @@ static char *big5_strrchr(char *s, int c)
}
/*******************************************************************
- Big5 multibyte char function.
+ Generic multibyte char skip function.
*******************************************************************/
-static int big5_multibyte_char(char c)
+static int skip_generic_multibyte_char(char c)
{
- if( is_big5_c1(c)) {
+ if( (*is_multibyte_char_1)(c)) {
return 2;
}
return 0;
@@ -1091,9 +1030,10 @@ static void setup_string_function(int codes)
}
}
-/*
- * Interpret coding system.
- */
+/************************************************************************
+ Interpret coding system.
+************************************************************************/
+
void interpret_coding_system(char *str)
{
int codes = UNKNOWN_CODE;
@@ -1191,12 +1131,21 @@ void interpret_coding_system(char *str)
Non multibyte char function.
*******************************************************************/
-static int not_multibyte_char(char c)
+static int skip_non_multibyte_char(char c)
{
return 0;
}
/*******************************************************************
+ Function that always says a character isn't multibyte.
+*******************************************************************/
+
+static BOOL not_multibyte_char_1(char c)
+{
+ return False;
+}
+
+/*******************************************************************
Setup the function pointers for the functions that are replaced
when multi-byte codepages are used.
@@ -1214,28 +1163,41 @@ void initialize_multibyte_vectors( int client_codepage)
multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
- is_multibyte_char = kanji_multibyte_char;
+ _skip_multibyte_char = skip_kanji_multibyte_char;
+ is_multibyte_char_1 = is_kanji_multibyte_char_1;
break;
case HANGUL_CODEPAGE:
- multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
- multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
- multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
- multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
- is_multibyte_char = hangul_multibyte_char;
- break;
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = hangul_is_multibyte_char_1;
case BIG5_CODEPAGE:
- multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
- multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
- multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
- multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
- is_multibyte_char = big5_multibyte_char;
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = big5_is_multibyte_char_1;
+ case SIMPLIFIED_CHINESE_CODEPAGE:
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = simpch_is_multibyte_char_1;
break;
+ /*
+ * Single char size code page.
+ */
default:
multibyte_strchr = (char *(*)(char *, int )) strchr;
multibyte_strrchr = (char *(*)(char *, int )) strrchr;
multibyte_strstr = (char *(*)(char *, char *)) strstr;
multibyte_strtok = (char *(*)(char *, char *)) strtok;
- is_multibyte_char = not_multibyte_char;
+ _skip_multibyte_char = skip_non_multibyte_char;
+ is_multibyte_char_1 = not_multibyte_char_1;
break;
}
}