blob: 398e8a7627b7aab9ed39499b61e66858dc80a7de (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
#include "subuniutil.h"
// Copied: 2-8-2005
// From: secuniutil.c
unsigned long
utf8getcc( const char** src )
{
register unsigned long c;
register const unsigned char* s = (const unsigned char*)*src;
switch (UTF8len [(*s >> 2) & 0x3F]) {
case 0: /* erroneous: s points to the middle of a character. */
c = (*s++) & 0x3F; goto more5;
case 1: c = (*s++); break;
case 2: c = (*s++) & 0x1F; goto more1;
case 3: c = (*s++) & 0x0F; goto more2;
case 4: c = (*s++) & 0x07; goto more3;
case 5: c = (*s++) & 0x03; goto more4;
case 6: c = (*s++) & 0x01; goto more5;
more5: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
more4: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
more3: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
more2: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
more1: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
break;
}
*src = (const char*)s;
return c;
}
//
wchar_t *
ASCIIToUnicode( const char *buf, wchar_t *uni, int inUnilen )
/* Convert the 0-terminated UTF-8 string 'buf' to 0-terminated UCS-2;
write the result into uni, truncated (if necessary) to fit in 0..unilen-1. */
/* XXX This function should be named UTF8ToUnicode */
/* XXX unilen should be size_t, not int */
{
auto size_t unilen = (size_t)inUnilen; /* to get rid of warnings for now */
auto size_t i;
if (unilen > 0 && buf && uni) {
for (i = 0; i < unilen; ++i) {
register unsigned long c = utf8getcc( &buf );
if (c >= 0xfffeUL) c = 0xfffdUL; /* REPLACEMENT CHARACTER */
if (0 == (uni[i] = (wchar_t)c)) break;
}
if (i >= unilen && unilen > 0) {
uni[unilen-1] = 0;
}
}
return uni;
}
wchar_t *
StrToUnicode( const char *buf )
{
wchar_t unibuf[1024];
ASCIIToUnicode( buf, unibuf, sizeof(unibuf) );
return _wcsdup( unibuf );
}
// End Copy
|