/* -*- mode: C; mode: fold -*- */ /* slkanji.c --- Interface To use Japanese 2byte KANJI code * Copyright (c) 1995, 2000 Kazuhisa Yoshino(k-yosino@actweb.ne.jp) * This file is part of the Japanized S-Lang library. * * You may distribute under the terms of either the GNU General Public * License or the Perl Artistic License. */ #include #include #include "config.h" #include "slang.h" #include #include "slang.h" #include "_slang.h" #include "slkanji.h" static char *Kcode[] = { "Ascii", "Euc", "Jis", "Sjis", /* "Binary", */ /* "SLang", */ NULL }; #if 1 struct _kSLcode_data /* Extended EUC */ { unsigned char *name; char *pre_str; /* previous string(escape sequence). If this value unset, code data output... */ unsigned char *func_name; /* unsigned char *(*convert_func)(); */ /* argument is 1? (or 2?). */ int lenth; /* character byte length */ int width; /* character width */ int mode; /* 0: after here. 1: next word(1*lenth) only. */ /* int enable; */ /* enable/disable */ } *current_set=(struct _kSLcode_data *)NULL, kSLcode_data[0x20] = { /* 0x80 */ {"",NULL,NULL,1,1,1}, /* 0x81 */ {"jisx0201", "\x1b(B", NULL, 1, 1, 0}, /* 0x82 */ {0,0,0,0,0,0}, /* 0x83 */ {0,0,0,0,0,0}, /* 0x84 */ {0,0,0,0,0,0}, /* 0x85 */ {0,0,0,0,0,0}, /* 0x86 */ {0,0,0,0,0,0}, /* 0x87 */ {0,0,0,0,0,0}, /* 0x88 */ {0,0,0,0,0,0}, /* 0x89 */ {0,0,0,0,0,0}, /* 0x8a */ {0,0,0,0,0,0}, /* 0x8b */ {0,0,0,0,0,0}, /* 0x8c */ {0,0,0,0,0,0}, /* 0x8d */ {0,0,0,0,0,0}, /* 0x8e */ {"euc-jp-ss2", NULL, NULL, 1, 1, 1}, /* 0x8f */ {"euc-jp-ss3", NULL,NULL, 2,2,1}, /* 0x90 */ {"jisx0208-1983", "\x1b$B", NULL, 2, 2, 0}, /* 0x91 */ {"jisx0208-1978", "\x1b$@", NULL, 2, 2, 0}, /* 0x92 */ {0,0,0,0,0,0}, /* 0x93 */ {0,0,0,0,0,0}, /* 0x94 */ {0,0,0,0,0,0}, /* 0x95 */ {0,0,0,0,0,0}, /* 0x96 */ {0,0,0,0,0,0}, /* 0x97 */ {0,0,0,0,0,0}, /* 0x98 */ {0,0,0,0,0,0}, /* 0x99 */ {0,0,0,0,0,0}, /* 0x9a */ {0,0,0,0,0,0}, /* 0x9b */ {0,0,0,0,0,0}, /* 0x9c */ {0,0,0,0,0,0}, /* 0x9d */ {0,0,0,0,0,0}, /* 0x9e */ {"extended",NULL,NULL,-1,-1,-1}, /* 0x9f */ {"extended",NULL,NULL,-1,-1,-1} }; int kSLset_code_data(unsigned char *name, char *pre, unsigned char *func, int len, int mod) { int i, n; for (i=0 ; i<32 ; i++) { if (kSLcode_data[i].name == NULL && kSLcode_data[i].pre_str == NULL) break; } if(i == 32) return -1; /* kSLcode_data table is full */ kSLcode_data[i].name = (unsigned char*)SLmalloc(strlen(name)+1); strcpy(kSLcode_data[i].name, name); kSLcode_data[i].pre_str = (char*)SLmalloc(strlen(pre)+1); strcpy(kSLcode_data[i].pre_str, pre); kSLcode_data[i].func_name = (char*)SLmalloc(strlen(func)+1); strcpy(kSLcode_data[i].func_name, func); kSLcode_data[i].lenth = len; kSLcode_data[i].mode = mod; return i; } int kSLfind_code_data(unsigned char *name, char *pre) { int i, n; for (i=0 ; i<0x20 ; i++) { if((name && !strcmp(name, kSLcode_data[i].name)) || (pre && !strcmp(pre, kSLcode_data[i].pre_str))) return i; } return -1; } #if 0 void kSLget_code_data_member(int i) { SLang_push_string(kSLcode_data[i].name); SLang_push_string(kSLcode_data[i].pre_str); SLang_push_string(kSLcode_data[i].func_name); SLang_push_integer(kSLcode_data[i].lenth); SLang_push_integer(kSLcode_data[i].mode); } #endif /* int convert_function(void (unsigned char *buf, int bufsize, *get_func)(void)) { } */ int kSLstrlen(unsigned char *str) { register int len, n=0; register unsigned char *p = str; if (!p) return 0; while (*p) { if ((0x80 & *p) && (*p < 0xa0)) /* 0x80 <= *p < 0xa0 */ { len = kSLcode_data[*p & 0x7f].lenth; /* kSLcode_data[*p - 0x80] */ n += len; p += len; } else n++; p++; } return n; } #endif int kSLcode = SLANG_DEFAULT_KANJI_CODE; int kSLfile_code = SLANG_DEFAULT_KANJI_CODE, kSLinput_code = SLANG_DEFAULT_KANJI_CODE, kSLdisplay_code = SLANG_DEFAULT_KANJI_CODE, kSLsystem_code = SLANG_DEFAULT_KANJI_CODE; #ifdef IBMPC_SYSTEM int kSLfiAuto = FALSE, SKanaToDKana = FALSE; #else int kSLfiAuto = TRUE, SKanaToDKana = TRUE; #endif int jp_nokanji = NOKANJI; int ascii = ASCII; int jp_euc = EUC; int jp_jis = JIS; int jp_sjis = SJIS; int val_true = TRUE; int val_false = FALSE; int IsKanji(int c, int code) /*{{{*/ { /* if(!code) return FALSE; */ c = (c & 0xff); if(code == SJIS) { if((0x80 < c && c < 0xa0) || (0xe0 <= c && c <= 0xfc)) return TRUE; } else if(code == EUC) { if(0xa0 < c && c < 0xff) return TRUE; if(c == 0x8e) return TRUE; /* fake */ } else if(code == JIS) { if(0x20 < c && c < 0x7f) return TRUE; } return FALSE; } /*}}}*/ int kSLiskanji(int *n) /*{{{*/ { return (IsKanji(*n, kSLcode)); } /*}}}*/ /* * distinguish KANJI code of pointed position in string * argment: * beg: begin of string * pos: position of string * return: * 0: ASCII * 1: KANJI 1st byte * 2: KANJI 2nd byte */ int kanji_pos(unsigned char *beg, unsigned char *pos) /*{{{*/ { int ret = 0; unsigned char *p = beg; if((beg == pos) || !iskanji(*(pos-1))) { if (iskanji(*pos)) return 1; /* KNAJI 1st byte */ else return ASCII; /* ASCII: 0 */ } while(p < pos) { if (iskanji(*p)) p++; p++; } if(p != pos) return (p - pos +1); if(iskanji(*p)) return 1; return ASCII; } /*}}}*/ #define CHAR_MASK 0x000000FF int short_kanji_pos(unsigned short *beg, unsigned short *pos) /*{{{*/ { int ret = 0; unsigned short *p = beg; if((beg == pos) || !iskanji(*(pos-1) & CHAR_MASK)) { if (iskanji(*pos & CHAR_MASK)) return 1; /* KNAJI 1st byte */ else return ASCII; /* ASCII: 0 */ } while(p < pos) { if (iskanji(*p & CHAR_MASK)) p++; p++; } if (p != pos) return ((p - pos) +1); if (iskanji(*p & CHAR_MASK)) return 1; return ASCII; } /*}}}*/ int iskanji2nd(char *str, int col) { int j; if(!col || !iskanji(str[col-1])) return FALSE; for( j=0 ; j < col ; j++ ) { if (iskanji(str[j])) j++; } if( j == col ) return FALSE; else return TRUE; } char *kcode_to_str(int n) { int i=0; while(Kcode[i]) { if(i == n) return Kcode[n]; i++; } return Kcode[ASCII]; } #ifdef REAL_UNIX_SYSTEM int Stricmp(char *src, char *dst) { while(*src) { if(toupper(*src) != toupper(*dst)) return (toupper(*src) - toupper(*dst)); src++; dst++; } return 0; } #endif int str_to_kcode(char *s) { int i; for(i=0 ; Kcode[i] ; i++) { if(!Stricmp(Kcode[i], s)) return i; } return (int)NULL; } void sjistojis(char *src, char *dst) { #if 1 sjistoeuc(src, dst); *dst++ &= 0x7f; *dst &= 0x7f; #else unsigned int high; unsigned int low; high = *src & 0xff; low = *(src+1) & 0xff; if (high <= 0x9f) high -= 0x71; else high -= 0xb1; high = high * 2 + 1; if (low > 0x7f) low--; if (low >= 0x9e) { low -= 0x7d; high++; } else { low -= 0x1f; } *dst = (char)(high & 0x7f); *(dst+1) = (char)(low & 0x7f); #endif } void jistosjis(char *src, char *dst) { int high; int low; high = *src & 0x7f; low = *(src+1) & 0x7f; if (high & 1) low += 0x1f; else low += 0x7d; if (low >= 0x7f) low++; high = ((high - 0x21) >> 1) + 0x81; if (high > 0x9f) high += 0x40; *dst = (char)high; *(dst+1) = (char)low; } void euctosjis(char *src, char *dst) { #if 1 euctojis(src, dst); jistosjis(dst, dst); #else int high; int low; high = (*src & 0x7f); low = (*(src+1) & 0x7f); if (high & 1) low += 0x1f; else low += 0x7d; if (low >= 0x7f) low++; high = ((high - 0x21) >> 1) + 0x81; if (high > 0x9f) high += 0x40; *dst = (char)high; *(dst+1) = (char)low; #endif } void sjistoeuc(char *src, char *dst) { unsigned int high; unsigned int low; high = *src & 0xff; low = *(src+1) & 0xff; if (high <= 0x9f) high -= 0x71; else high -= 0xb1; high = high * 2 + 1; if (low > 0x7f) low--; if (low >= 0x9e) { low -= 0x7d; high++; } else { low -= 0x1f; } *dst = (char)(high | 0x80); *(dst+1) = (char)(low | 0x80); } void euctojis(char *src, char *dst) { *dst = *src & 0x7f; *(dst+1) = *(src+1) & 0x7f; } void jistoeuc(char *src, char *dst) { *dst = (*src | 0x80); *(dst+1) = (*(src+1) | 0x80); } void notconv(char *src, char *dst) { *dst = *src; *(dst+1) = *(src+1); } void (*kSLcodeconv[NCODE][NCODE])() = {{notconv, notconv, notconv, notconv}, {notconv, notconv, euctojis, euctosjis}, {notconv, jistoeuc, notconv, jistosjis}, {notconv, sjistoeuc, sjistojis, notconv}}; void displaycode_to_SLang(char *src, char *dst) { int in = kSLdisplay_code, out = kSLcode; if (in < 0 || NCODE <= in) in = ASCII; if (out < 0 || NCODE <= out) out = ASCII; kSLcodeconv[in][out](src, dst); } #define ISMARU(c) (0xca <= (c & 0xff) && (c & 0xff) <= 0xce) #define ISNIGORI(c) ((0xb6 <= (c & 0xff) && (c & 0xff) <= 0xc4)\ || (0xca <= (c & 0xff) && (c & 0xff) <= 0xce)\ || (0xb3 == (c & 0xff))) void han2zen(in, out, lin, lout, code) /*{{{*/ unsigned char *in, *out; int *lin, *lout, code; { int maru = FALSE, nigori = FALSE; unsigned char ch1, ch2 = '\0'; int mtable[][2] = { {129,66},{129,117},{129,118},{129,65},{129,69},{131,146},{131,64},{131,66}, {131,68},{131,70},{131,72},{131,131},{131,133},{131,135},{131,98},{129,91}, {131,65},{131,67},{131,69},{131,71},{131,73},{131,74},{131,76},{131,78}, {131,80},{131,82},{131,84},{131,86},{131,88},{131,90},{131,92},{131,94}, {131,96},{131,99},{131,101},{131,103},{131,105},{131,106},{131,107},{131,108}, {131,109},{131,110},{131,113},{131,116},{131,119},{131,122},{131,125},{131,126}, {131,128},{131,129},{131,130},{131,132},{131,134},{131,136},{131,137},{131,138}, {131,139},{131,140},{131,141},{131,143},{131,147},{129,74},{129,75} }; if(code == EUC) { ch1 = in[1]; if (SKanaToDKana <= 0) if (in[2] == SS2) ch2 = in[3]; } else if(code == JIS) { ch1 = (in[0] | 0x80); ch2 = (in[1] | 0x80); } else { ch1 = in[0]; ch2 = in[1]; } if( ch1 == 0xa0 ) { out[0] = ' '; out[1] = '\0'; *lin = *lout = 1; if(code == EUC) *lin = 2; } else { if (SKanaToDKana <= 0) { if(ch2 == 0xde && ISNIGORI(ch1)) nigori = TRUE; else if(ch2 == 0xdf && ISMARU(ch1)) maru = TRUE; } out[0] = mtable[ch1 - 0xa1][0]; out[1] = mtable[ch1 - 0xa1][1]; if(nigori) { if((0x4a <= out[1] && out[1] <= 0x67) || (0x6e <= out[1] && out[1] <= 0x7a)) out[1]++; else if(out[0] == 0x83 && out[1] == 0x45) out[1] = 0x94; } else if(maru && 0x6e <= out[1] && out[1] <= 0x7a) out[1] += 2; if(nigori || maru) *lin = 2; else *lin = 1; if(code == EUC) *lin *= 2; *lout = 2; } } /*}}}*/ /* * Not check, if src[n-1] is KANJI first byte, or if src[n-1] is in JIS ESC sequence, * it return more bigger number. understand? * * if you want change "src" string from Kanji *incode to Kanji *outcode, * this function return to need byte for Code Convert. * * htoz: hankaku to zenkaku flag (TRUE or FALSE) */ int kSLCheckLineNum(unsigned char *src, int n, int incode, int outcode, int htoz) { int i, siz=0; int kflg = FALSE, hflg = FALSE; int okflg = FALSE, ohflg = FALSE; for (i=0 ; i