diff options
Diffstat (limited to 'lib/util/charset')
-rw-r--r-- | lib/util/charset/CP437.c | 135 | ||||
-rw-r--r-- | lib/util/charset/CP850.c | 121 | ||||
-rw-r--r-- | lib/util/charset/charcnv.c | 135 | ||||
-rw-r--r-- | lib/util/charset/charset.h | 16 | ||||
-rw-r--r-- | lib/util/charset/charset_macosxfs.c | 605 | ||||
-rw-r--r-- | lib/util/charset/codepoints.c | 50 | ||||
-rw-r--r-- | lib/util/charset/convert_string.c | 54 | ||||
-rw-r--r-- | lib/util/charset/pull_push.c | 150 | ||||
-rw-r--r-- | lib/util/charset/tests/convert_string.c | 547 | ||||
-rw-r--r-- | lib/util/charset/util_str.c | 88 | ||||
-rw-r--r-- | lib/util/charset/util_unistr.c | 248 | ||||
-rw-r--r-- | lib/util/charset/util_unistr_w.c | 42 | ||||
-rw-r--r-- | lib/util/charset/weird.c | 134 | ||||
-rw-r--r-- | lib/util/charset/wscript_build | 50 |
14 files changed, 1821 insertions, 554 deletions
diff --git a/lib/util/charset/CP437.c b/lib/util/charset/CP437.c new file mode 100644 index 0000000000..1e478d678f --- /dev/null +++ b/lib/util/charset/CP437.c @@ -0,0 +1,135 @@ +/* + * Conversion table for CP437 charset also known as IBM437 + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM437 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, + 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, + 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00c9, -32 }, + { 0x00d1, 0x00ff, -39 }, + { 0x0192, 0x0192, -185 }, + { 0x0393, 0x0398, -697 }, + { 0x03a3, 0x03a9, -707 }, + { 0x03b1, 0x03b5, -714 }, + { 0x03c0, 0x03c6, -724 }, + { 0x207f, 0x207f, -8076 }, + { 0x20a7, 0x20a7, -8115 }, + { 0x2219, 0x221e, -8484 }, + { 0x2229, 0x2229, -8494 }, + { 0x2248, 0x2248, -8524 }, + { 0x2261, 0x2265, -8548 }, + { 0x2310, 0x2310, -8718 }, + { 0x2320, 0x2321, -8733 }, + { 0x2500, 0x2502, -9211 }, + { 0x250c, 0x251c, -9220 }, + { 0x2524, 0x2524, -9227 }, + { 0x252c, 0x252c, -9234 }, + { 0x2534, 0x2534, -9241 }, + { 0x253c, 0x253c, -9248 }, + { 0x2550, 0x256c, -9267 }, + { 0x2580, 0x2593, -9286 }, + { 0x25a0, 0x25a0, -9298 }, + { 0xffff, 0xffff, 0 } +}; + +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, + 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, + 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, + 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, + 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, + 0x00, 0x90, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, + 0xe1, 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, + 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, + 0x8b, 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, + 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, + 0x98, 0x9f, 0xe2, 0x00, 0x00, 0x00, 0x00, 0xe9, + 0xe4, 0x00, 0x00, 0xe8, 0x00, 0x00, 0xea, 0xe0, + 0x00, 0x00, 0xeb, 0xee, 0xe3, 0x00, 0x00, 0xe5, + 0xe7, 0x00, 0xed, 0xfc, 0x9e, 0xf9, 0xfb, 0x00, + 0x00, 0x00, 0xec, 0xef, 0xf7, 0xf0, 0x00, 0x00, + 0xf3, 0xf2, 0xa9, 0xf4, 0xf5, 0xc4, 0x00, 0xb3, + 0xda, 0x00, 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, + 0xc0, 0x00, 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, + 0xc3, 0xb4, 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0xd5, + 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, 0xd4, 0xd3, 0xc8, + 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, 0xcc, 0xb5, 0xb6, + 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, 0xd0, 0xca, 0xd8, + 0xd7, 0xce, 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, + 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, + 0x00, 0x00, 0xde, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP437) diff --git a/lib/util/charset/CP850.c b/lib/util/charset/CP850.c new file mode 100644 index 0000000000..87a76f4cdf --- /dev/null +++ b/lib/util/charset/CP850.c @@ -0,0 +1,121 @@ +/* + * Conversion table for CP850 charset also known as IBM850. + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM850 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, + 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, + 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, + 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, + 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, + 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, + 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, + 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + /* start, end, idx */ + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00ff, -32 }, + { 0x0131, 0x0131, -81 }, + { 0x0192, 0x0192, -177 }, + { 0x2017, 0x2017, -7989 }, + { 0x2500, 0x2502, -9245 }, + { 0x250c, 0x251c, -9254 }, + { 0x2524, 0x2524, -9261 }, + { 0x252c, 0x252c, -9268 }, + { 0x2534, 0x2534, -9275 }, + { 0x253c, 0x253c, -9282 }, + { 0x2550, 0x256c, -9301 }, + { 0x2580, 0x2588, -9320 }, + { 0x2591, 0x2593, -9328 }, + { 0x25a0, 0x25a0, -9340 }, + { 0xffff, 0xffff, 0 } +}; +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, + 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, + 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, + 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, + 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, + 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, + 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, + 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, + 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, + 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, + 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, + 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98, + 0xd5, 0x9f, 0xf2, 0xc4, 0x00, 0xb3, 0xda, 0x00, + 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, + 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, 0xc3, 0xb4, + 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0x00, 0x00, 0xc9, + 0x00, 0x00, 0xbb, 0x00, 0x00, 0xc8, 0x00, 0x00, + 0xbc, 0x00, 0x00, 0xcc, 0x00, 0x00, 0xb9, 0x00, + 0x00, 0xcb, 0x00, 0x00, 0xca, 0x00, 0x00, 0xce, + 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, + 0xdb, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP850) + diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index 998bb08fd7..076795a0b2 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -113,138 +113,3 @@ convert: return destlen; } - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @returns the number of bytes occupied in the destination - * on error, returns -1, and sets errno - **/ -_PUBLIC_ bool convert_string_error_handle(struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - size_t i_len, o_len; - ssize_t retval; - const char* inbuf = (const char*)src; - char* outbuf = (char*)dest; - smb_iconv_t descriptor; - - if (srclen == (size_t)-1) - srclen = strlen(inbuf)+1; - - descriptor = get_conv_handle(ic, from, to); - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - if (converted_size) { - *converted_size = 0; - } - errno = EINVAL; - return -1; - } - - i_len=srclen; - o_len=destlen; - - retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); - - if (converted_size != NULL) - *converted_size = destlen-o_len; - return (retval != (ssize_t)-1); -} - - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @returns the number of bytes occupied in the destination - **/ -_PUBLIC_ bool convert_string_handle(struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, size_t *converted_size) -{ - bool retval; - - retval = convert_string_error_handle(ic, from, to, src, srclen, dest, destlen, converted_size); - if(retval==false) { - const char *reason; - switch(errno) { - case EINVAL: - reason="Incomplete multibyte sequence"; - return false; - case E2BIG: - reason="No more room"; - if (from == CH_UNIX) { - DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d - '%s'\n", - charset_name(ic, from), charset_name(ic, to), - (int)srclen, (int)destlen, - (const char *)src)); - } else { - DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d\n", - charset_name(ic, from), charset_name(ic, to), - (int)srclen, (int)destlen)); - } - return false; - case EILSEQ: - reason="Illegal multibyte sequence"; - return false; - default: - return false; - } - } - return true; -} - -/** - * Convert between character sets, allocating a new buffer using talloc for the result. - * - * @param srclen length of source buffer. - * @param dest always set at least to NULL - * @note -1 is not accepted for srclen. - * - * @returns Size in bytes of the converted string; or -1 in case of error. - **/ - -_PUBLIC_ bool convert_string_talloc_handle(TALLOC_CTX *ctx, - struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dst, size_t *converted_size) -{ - void **dest = (void **)dst; - smb_iconv_t descriptor; - ssize_t ret; - - *dest = NULL; - - if (src == NULL || srclen == (size_t)-1 || srclen == 0) - return false; - - descriptor = get_conv_handle(ic, from, to); - - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - /* conversion not supported, return -1*/ - DEBUG(3, ("convert_string_talloc_handle: conversion from %s to %s not supported!\n", - charset_name(ic, from), - charset_name(ic, to))); - return false; - } - - ret = iconv_talloc(ctx, descriptor, src, srclen, dest); - if (ret == -1) - return false; - if (converted_size != NULL) - *converted_size = ret; - return true; -} - diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 1078035592..b36c461003 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -28,7 +28,7 @@ #include <talloc.h> /* this defines the charset types used in samba */ -typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; +typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; #define NUM_CHARSETS 7 @@ -105,11 +105,6 @@ typedef struct smb_iconv_s { struct loadparm_context; struct smb_iconv_handle; -/* replace some string functions with multi-byte - versions */ -#define strlower(s) strlower_m(s) -#define strupper(s) strupper_m(s) - char *strchr_m(const char *s, char c); /** * Calculate the number of units (8 or 16-bit, depending on the @@ -137,8 +132,6 @@ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, const char *s1, const char *s2); int strcasecmp_m(const char *s1, const char *s2); size_t count_chars_m(const char *s, char c); -void strupper_m(char *s); -void strlower_m(char *s); char *strupper_talloc(TALLOC_CTX *ctx, const char *src); char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src); char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle, @@ -155,6 +148,7 @@ bool strhasupper_handle(struct smb_iconv_handle *ic, const char *string); char *strrchr_m(const char *s, char c); char *strchr_m(const char *s, char c); +char *strstr_m(const char *src, const char *findstr); bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size); @@ -188,8 +182,7 @@ extern struct smb_iconv_handle *global_iconv_handle; struct smb_iconv_handle *get_iconv_handle(void); struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx, const char *dos_charset, - const char *unix_charset, - const char *display_charset); + const char *unix_charset); smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic, charset_t from, charset_t to); const char *charset_name(struct smb_iconv_handle *ic, charset_t ch); @@ -218,7 +211,6 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2); struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, const char *dos_charset, const char *unix_charset, - const char *display_charset, bool native_iconv, struct smb_iconv_handle *old_ic); @@ -285,7 +277,7 @@ static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytes int i; \ int done = 0; \ \ - uint16 ch = SVAL(*inbuf,0); \ + uint16_t ch = SVAL(*inbuf,0); \ \ for (i=0; from_idx[i].start != 0xffff; i++) { \ if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \ diff --git a/lib/util/charset/charset_macosxfs.c b/lib/util/charset/charset_macosxfs.c new file mode 100644 index 0000000000..4d2ba5b6ff --- /dev/null +++ b/lib/util/charset/charset_macosxfs.c @@ -0,0 +1,605 @@ +/* + Unix SMB/CIFS implementation. + Samba charset module for Mac OS X/Darwin + Copyright (C) Benjamin Riefenstahl 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* + * modules/charset_macosxfs.c + * + * A Samba charset module to use on Mac OS X/Darwin as the filesystem + * and display encoding. + * + * Actually two implementations are provided here. The default + * implementation is based on the official CFString API. The other is + * based on internal CFString APIs as defined in the OpenDarwin + * source. + */ + +#include "includes.h" +#undef realloc + +/* + * Include OS frameworks. These are only needed in this module. + */ +#include <CoreFoundation/CFString.h> + +/* + * See if autoconf has found us the internal headers in some form. + */ +#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H +# include <CoreFoundation/CFStringEncodingConverter.h> +# include <CoreFoundation/CFUnicodePrecomposition.h> +# define USE_INTERNAL_API 1 +#elif HAVE_CFSTRINGENCODINGCONVERTER_H +# include <CFStringEncodingConverter.h> +# include <CFUnicodePrecomposition.h> +# define USE_INTERNAL_API 1 +#endif + +/* + * Compile time configuration: Do we want debug output? + */ +/* #define DEBUG_STRINGS 1 */ + +/* + * A simple, but efficient memory provider for our buffers. + */ +static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize) +{ + if (newsize > *size) { + *size = newsize + 128; + buffer = realloc(buffer, *size); + } + return buffer; +} + +/* + * While there is a version of OpenDarwin for intel, the usual case is + * big-endian PPC. So we need byte swapping to handle the + * little-endian byte order of the network protocol. We also need an + * additional dynamic buffer to do this work for incoming data blocks, + * because we have to consider the original data as constant. + * + * We abstract the differences away by providing a simple facade with + * these functions/macros: + * + * le_to_native(dst,src,len) + * native_to_le(cp,len) + * set_ucbuffer_with_le(buffer,bufsize,data,size) + * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve) + */ +#ifdef WORDS_BIGENDIAN + +static inline void swap_bytes (char * dst, const char * src, size_t len) +{ + const char *srcend = src + len; + while (src < srcend) { + dst[0] = src[1]; + dst[1] = src[0]; + dst += 2; + src += 2; + } +} +static inline void swap_bytes_inplace (char * cp, size_t len) +{ + char temp; + char *end = cp + len; + while (cp < end) { + temp = cp[1]; + cp[1] = cp[0]; + cp[0] = temp; + cp += 2; + } +} + +#define le_to_native(dst,src,len) swap_bytes(dst,src,len) +#define native_to_le(cp,len) swap_bytes_inplace(cp,len) +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0) + +#else /* ! WORDS_BIGENDIAN */ + +#define le_to_native(dst,src,len) memcpy(dst,src,len) +#define native_to_le(cp,len) /* nothing */ +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + (((void)(bufsize)),(UniChar*)(data)) + +#endif + +static inline UniChar *set_ucbuffer_with_le_copy ( + UniChar *buffer, size_t *bufsize, + const void *data, size_t size, size_t reserve) +{ + buffer = resize_buffer(buffer, bufsize, size+reserve); + le_to_native((char*)buffer,data,size); + return buffer; +} + + +/* + * A simple hexdump function for debugging error conditions. + */ +#define debug_out(s) DEBUG(0,(s)) + +#ifdef DEBUG_STRINGS + +static void hexdump( const char * label, const char * s, size_t len ) +{ + size_t restlen = len; + debug_out("<<<<<<<\n"); + debug_out(label); + debug_out("\n"); + while (restlen > 0) { + char line[100]; + size_t i, j; + char * d = line; +#undef sprintf + d += sprintf(d, "%04X ", (unsigned)(len-restlen)); + *d++ = ' '; + for( i = 0; i<restlen && i<8; ++i ) { + d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); + } + for( j = i; j<8; ++j ) { + d += sprintf(d, " "); + } + *d++ = ' '; + for( i = 8; i<restlen && i<16; ++i ) { + d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); + } + for( j = i; j<16; ++j ) { + d += sprintf(d, " "); + } + *d++ = ' '; + for( i = 0; i<restlen && i<16; ++i ) { + if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i])) + *d++ = '.'; + else + *d++ = s[i]; + } + *d++ = '\n'; + *d = 0; + restlen -= i; + s += i; + debug_out(line); + } + debug_out(">>>>>>>\n"); +} + +#else /* !DEBUG_STRINGS */ + +#define hexdump(label,s,len) /* nothing */ + +#endif + + +#if !USE_INTERNAL_API + +/* + * An implementation based on documented Mac OS X APIs. + * + * This does a certain amount of memory management, creating and + * manipulating CFString objects. We try to minimize the impact by + * keeping those objects around and re-using them. We also use + * external backing store for the CFStrings where this is possible and + * benficial. + * + * The Unicode normalizations forms available at this level are + * generic, not specifically for the file system. So they may not be + * perfect fits. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + size_t outsize; + CFRange range; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + if (NULL == cfstring) { + /* + * A version with an external backing store as in the + * push function should have been more efficient, but + * testing shows, that it is actually slower (!). + * Maybe kCFAllocatorDefault gets shortcut evaluation + * internally, while kCFAllocatorNull doesn't. + */ + cfstring = CFStringCreateMutable(kCFAllocatorDefault,0); + } + + /* + * Three methods of appending to a CFString, choose the most + * efficient. + */ + if (0 == (*inbuf)[*inbytesleft-1]) { + CFStringAppendCString(cfstring, *inbuf, script_code); + } else if (*inbytesleft <= 255) { + Str255 buffer; + buffer[0] = *inbytesleft; + memcpy(buffer+1, *inbuf, buffer[0]); + CFStringAppendPascalString(cfstring, buffer, script_code); + } else { + /* + * We would like to use a fixed buffer and a loop + * here, but than we can't garantee that the input is + * well-formed UTF-8, as we are supposed to do. + */ + static char *buffer = NULL; + static size_t buflen = 0; + buffer = resize_buffer(buffer, &buflen, *inbytesleft+1); + memcpy(buffer, *inbuf, *inbytesleft); + buffer[*inbytesleft] = 0; + CFStringAppendCString(cfstring, *inbuf, script_code); + } + + /* + * Compose characters, using the non-canonical composition + * form. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormC); + + outsize = CFStringGetLength(cfstring); + range = CFRangeMake(0,outsize); + + if (outsize == 0) { + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "An unknown error occurred\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; /* Not sure, but this is what we have + * actually seen. */ + return -1; + } + if (outsize*2 > *outbytesleft) { + CFStringDelete(cfstring, range); + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf); + CFStringDelete(cfstring, range); + + native_to_le(*outbuf, outsize*2); + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && + (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) { + + if ((outsize*2+2) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0; + outsize += 2; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize*2; + *outbytesleft -= outsize*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + static UniChar *buffer = NULL; + static size_t buflen = 0; + CFIndex outsize, cfsize, charsconverted; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + /* + * We need a buffer that can hold 4 times the original data, + * because that is the theoretical maximum that decomposition + * can create currently (in Unicode 4.0). + */ + buffer = set_ucbuffer_with_le_copy( + buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft); + + if (NULL == cfstring) { + cfstring = CFStringCreateMutableWithExternalCharactersNoCopy( + kCFAllocatorDefault, + buffer, *inbytesleft/2, buflen/2, + kCFAllocatorNull); + } else { + CFStringSetExternalCharactersNoCopy( + cfstring, + buffer, *inbytesleft/2, buflen/2); + } + + /* + * Decompose characters, using the non-canonical decomposition + * form. + * + * NB: This isn't exactly what HFS+ wants (see note on + * kCFStringEncodingUseHFSPlusCanonical in + * CFStringEncodingConverter.h), but AFAIK it's the best that + * the official API can do. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormD); + + cfsize = CFStringGetLength(cfstring); + charsconverted = CFStringGetBytes( + cfstring, CFRangeMake(0,cfsize), + script_code, 0, false, + *outbuf, *outbytesleft, &outsize); + + if (0 == charsconverted) { + debug_out("String conversion: " + "Buffer too small or not convertable\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = EILSEQ; /* Probably more likely. */ + return -1; + } + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] && + (0 != (*outbuf)[outsize-1])) { + + if (((size_t)outsize+1) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize] = 0; + ++outsize; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize; + *outbytesleft -= outsize; + + return 0; +} + +#else /* USE_INTERNAL_API */ + +/* + * An implementation based on internal code as known from the + * OpenDarwin CVS. + * + * This code doesn't need much memory management because it uses + * functions that operate on the raw memory directly. + * + * The push routine here is faster and more compatible with HFS+ than + * the other implementation above. The pull routine is only faster + * for some strings, slightly slower for others. The pull routine + * looses because it has to iterate over the data twice, once to + * decode UTF-8 and than to do the character composition required by + * Windows. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + UInt32 srcCharsUsed = 0; + UInt32 dstCharsUsed = 0; + UInt32 result; + uint32_t dstDecomposedUsed = 0; + uint32_t dstPrecomposedUsed = 0; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + result = CFStringEncodingBytesToUnicode( + script_code, kCFStringEncodingComposeCombinings, + *inbuf, *inbytesleft, &srcCharsUsed, + (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + /* + * It doesn't look like CFStringEncodingBytesToUnicode() can + * produce precomposed characters (flags=ComposeCombinings + * doesn't do it), so we need another pass over the data here. + * We can do this in-place, as the string can only get + * shorter. + * + * (Actually in theory there should be an internal + * decomposition and reordering before the actual composition + * step. But we should be able to rely on that we always get + * fully decomposed strings for input, so this can't create + * problems in reality.) + */ + CFUniCharPrecompose( + (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed, + (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed); + + native_to_le(*outbuf, dstPrecomposedUsed*2); + + *inbuf += srcCharsUsed; + *inbytesleft -= srcCharsUsed; + *outbuf += dstPrecomposedUsed*2; + *outbytesleft -= dstPrecomposedUsed*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static UniChar *buffer = NULL; + static size_t buflen = 0; + UInt32 srcCharsUsed=0, dstCharsUsed=0, result; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + buffer = set_ucbuffer_with_le( + buffer, &buflen, *inbuf, *inbytesleft); + + result = CFStringEncodingUnicodeToBytes( + script_code, kCFStringEncodingUseHFSPlusCanonical, + buffer, *inbytesleft/2, &srcCharsUsed, + *outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft/2 == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/open.c:check_for_pipe():is_legal_name() + * cuts a pathname to 10 characters blindly. Suppress + * the debug output in those cases. + */ + if(10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + *inbuf += srcCharsUsed*2; + *inbytesleft -= srcCharsUsed*2; + *outbuf += dstCharsUsed; + *outbytesleft -= dstCharsUsed; + + return 0; +} + +#endif /* USE_INTERNAL_API */ + +/* + * For initialization, actually install the encoding as "macosxfs". + */ +static struct charset_functions macosxfs_encoding_functions = { + "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push +}; + +NTSTATUS charset_macosxfs_init(void) +{ + if (!smb_register_charset(&macosxfs_encoding_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} + +/* eof */ diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index cd54420e8e..8cc33a9782 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -23,7 +23,7 @@ #include "includes.h" #include "lib/util/charset/charset.h" #include "system/locale.h" -#include "dynconfig.h" +#include "dynconfig/dynconfig.h" #ifdef strcasecmp #undef strcasecmp @@ -168,17 +168,16 @@ struct smb_iconv_handle *get_iconv_handle(void) { if (global_iconv_handle == NULL) global_iconv_handle = smb_iconv_handle_reinit(talloc_autofree_context(), - "ASCII", "UTF-8", "ASCII", true, NULL); + "ASCII", "UTF-8", true, NULL); return global_iconv_handle; } struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx, const char *dos_charset, - const char *unix_charset, - const char *display_charset) + const char *unix_charset) { return smb_iconv_handle_reinit(mem_ctx, - dos_charset, unix_charset, display_charset, true, NULL); + dos_charset, unix_charset, true, NULL); } /** @@ -190,7 +189,6 @@ const char *charset_name(struct smb_iconv_handle *ic, charset_t ch) case CH_UTF16: return "UTF-16LE"; case CH_UNIX: return ic->unix_charset; case CH_DOS: return ic->dos_charset; - case CH_DISPLAY: return ic->display_charset; case CH_UTF8: return "UTF8"; case CH_UTF16BE: return "UTF-16BE"; case CH_UTF16MUNGED: return "UTF16_MUNGED"; @@ -219,37 +217,6 @@ static int close_iconv_handle(struct smb_iconv_handle *data) return 0; } -static const char *map_locale(const char *charset) -{ - if (strcmp(charset, "LOCALE") != 0) { - return charset; - } -#if defined(HAVE_NL_LANGINFO) && defined(CODESET) - { - const char *ln; - smb_iconv_t handle; - - ln = nl_langinfo(CODESET); - if (ln == NULL) { - DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n")); - return "ASCII"; - } - /* Check whether the charset name is supported - by iconv */ - handle = smb_iconv_open(ln, "UCS-2LE"); - if (handle == (smb_iconv_t) -1) { - DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); - return "ASCII"; - } else { - DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); - smb_iconv_close(handle); - } - return ln; - } -#endif - return "ASCII"; -} - /* the old_ic is passed in here as the smb_iconv_handle structure is used as a global pointer in some places (eg. python modules). We @@ -261,14 +228,11 @@ static const char *map_locale(const char *charset) _PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, const char *dos_charset, const char *unix_charset, - const char *display_charset, bool native_iconv, struct smb_iconv_handle *old_ic) { struct smb_iconv_handle *ret; - display_charset = map_locale(display_charset); - if (old_ic != NULL) { ret = old_ic; close_iconv_handle(ret); @@ -290,9 +254,13 @@ _PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, talloc_set_destructor(ret, close_iconv_handle); + if (strcasecmp(dos_charset, "UTF8") == 0 || strcasecmp(dos_charset, "UTF-8") == 0) { + DEBUG(0,("ERROR: invalid DOS charset: 'dos charset' must not be UTF8, using (default value) CP850 instead\n")); + dos_charset = "CP850"; + } + ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset); ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset); - ret->display_charset = talloc_strdup(ret->child_ctx, display_charset); ret->native_iconv = native_iconv; return ret; diff --git a/lib/util/charset/convert_string.c b/lib/util/charset/convert_string.c index e51add2aaf..51f9fec137 100644 --- a/lib/util/charset/convert_string.c +++ b/lib/util/charset/convert_string.c @@ -2,7 +2,8 @@ Unix SMB/CIFS implementation. Character set conversion Extensions Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001 - Copyright (C) Andrew Tridgell 2001 + Copyright (C) Andrew Tridgell 2001-2011 + Copyright (C) Andrew Bartlett 2011 Copyright (C) Simo Sorce 2001 Copyright (C) Martin Pool 2003 @@ -21,6 +22,7 @@ */ #include "includes.h" +#include "system/iconv.h" /** * @file @@ -177,28 +179,29 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, size_t slen = srclen; size_t dlen = destlen; unsigned char lastp = '\0'; + bool ret; - /* If all characters are ascii, fast path here. */ - while (((slen == (size_t)-1) || (slen >= 2)) && dlen) { - if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { + if (slen == (size_t)-1) { + while (dlen && + ((lastp = *p) <= 0x7f) && (p[1] == 0)) { *q++ = *p; - if (slen != (size_t)-1) { - slen -= 2; - } p += 2; dlen--; retval++; if (!lastp) break; - } else { -#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS - goto general_case; -#else - bool ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size); - *converted_size += retval; - return ret; -#endif } + if (lastp != 0) goto slow_path; + } else { + while (slen >= 2 && dlen && + (*p <= 0x7f) && (p[1] == 0)) { + *q++ = *p; + slen -= 2; + p += 2; + dlen--; + retval++; + } + if (slen != 0) goto slow_path; } *converted_size = retval; @@ -212,6 +215,19 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, } } return true; + + slow_path: + /* come here when we hit a character we can't deal + * with in the fast path + */ +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size); + *converted_size += retval; + return ret; +#endif + } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; @@ -221,8 +237,8 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, unsigned char lastp = '\0'; /* If all characters are ascii, fast path here. */ - while (slen && (dlen >= 2)) { - if ((lastp = *p) <= 0x7F) { + while (slen && (dlen >= 1)) { + if (dlen >=2 && (lastp = *p) <= 0x7F) { *q++ = *p++; *q++ = '\0'; if (slen != (size_t)-1) { @@ -387,7 +403,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic, } /* +2 is for ucs2 null termination. */ - ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2); + ob = talloc_realloc(ctx, ob, char, destlen + 2); if (!ob) { DEBUG(0, ("convert_string_talloc: realloc failed!\n")); @@ -428,7 +444,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic, */ if (o_len > 1024) { /* We're shrinking here so we know the +2 is safe from wrap. */ - ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2); + ob = talloc_realloc(ctx,ob, char, destlen + 2); } if (destlen && !ob) { diff --git a/lib/util/charset/pull_push.c b/lib/util/charset/pull_push.c new file mode 100644 index 0000000000..b7a5bcdc65 --- /dev/null +++ b/lib/util/charset/pull_push.c @@ -0,0 +1,150 @@ +/* + Unix SMB/CIFS implementation. + Character set conversion Extensions + Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001 + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Simo Sorce 2001 + Copyright (C) Martin Pool 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +*/ + +#include "includes.h" +#include "system/locale.h" + +/** + * Copy a string from a unix char* src to a UCS2 destination, + * allocating a buffer using talloc(). + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ +bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, + (void **)dest, converted_size); +} + +/** + * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, + (void**)dest, converted_size); +} + +/** + * Copy a string from a unix char* src to an ASCII destination, + * allocating a buffer using talloc(). + * + * @param dest always set at least to NULL + * + * @param converted_size The number of bytes occupied by the string in the destination + * @returns boolean indicating if the conversion was successful + **/ +bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len, + (void **)dest, converted_size); +} + +/** + * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, + size_t *converted_size) +{ + size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); + + *dest = NULL; + return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} + + +/** + * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} + + +/** + * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} diff --git a/lib/util/charset/tests/convert_string.c b/lib/util/charset/tests/convert_string.c index 32fc11f527..9a5d974fe3 100644 --- a/lib/util/charset/tests/convert_string.c +++ b/lib/util/charset/tests/convert_string.c @@ -105,7 +105,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) talloc_steal(tctx, gd_iso8859_1.data); talloc_steal(tctx, gd_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -199,11 +199,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, gd_utf8.data, gd_utf8.length, (void *)&gd_output.data, &gd_output.length), - "conversion from UTF8 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -227,11 +227,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, gd_utf16le.data, gd_utf16le.length, (void *)&gd_output.data, &gd_output.length), - "conversion from UTF16LE to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_DOS, @@ -248,11 +248,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DOS, CH_DISPLAY, + CH_DOS, CH_UTF8, gd_iso8859_1.data, gd_iso8859_1.length, (void *)&gd_output.data, &gd_output.length), - "conversion from (dos charset) ISO8859-1 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF16LE, @@ -265,7 +265,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) (const char *)gd_iso8859_1.data, CH_DOS, CH_UTF16LE), gd_output.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF8, @@ -282,6 +282,191 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) return true; } +static bool test_gd_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + DATA_BLOB gd_utf8_terminated; + DATA_BLOB gd_cp850_terminated; + DATA_BLOB gd_utf16le_terminated; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + gd_utf8_terminated = data_blob_talloc(tctx, NULL, gd_utf8.length + 1); + memcpy(gd_utf8_terminated.data, gd_utf8.data, gd_utf8.length); + gd_utf8_terminated.data[gd_utf8.length] = '\0'; + + gd_cp850_terminated = data_blob_talloc(tctx, NULL, gd_cp850.length + 1); + memcpy(gd_cp850_terminated.data, gd_cp850.data, gd_cp850.length); + gd_cp850_terminated.data[gd_cp850.length] = '\0'; + + gd_utf16le_terminated = data_blob_talloc(tctx, NULL, gd_utf16le.length + 2); + memcpy(gd_utf16le_terminated.data, gd_utf16le.data, gd_utf16le.length); + gd_utf16le_terminated.data[gd_utf16le.length] = '\0'; + gd_utf16le_terminated.data[gd_utf16le.length + 1] = '\0'; + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 1, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 2, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 1, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 2, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to CP850 (dos) null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to CP850 (dos) null terminated"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[3] = '\0'; + gd_utf8_terminated.length = 4; /* used for the comparison only */ + + gd_cp850_terminated.data[2] = '\0'; + gd_cp850_terminated.length = 3; /* used for the comparison only */ + + gd_utf16le_terminated.data[4] = '\0'; + gd_utf16le_terminated.data[5] = '\0'; + gd_utf16le_terminated.length = 6; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_DOS, CH_UTF16LE, + gd_cp850_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from CP850 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[1] = '\0'; + gd_utf8_terminated.length = 2; /* used for the comparison only */ + + gd_utf16le_terminated.data[2] = '\0'; + gd_utf16le_terminated.data[3] = '\0'; + gd_utf16le_terminated.length = 4; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + static bool test_gd_ascii_handle(struct torture_context *tctx) { struct smb_iconv_handle *iconv_handle; @@ -296,7 +481,7 @@ static bool test_gd_ascii_handle(struct torture_context *tctx) talloc_steal(tctx, gd_iso8859_1.data); talloc_steal(tctx, gd_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -365,7 +550,7 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx talloc_steal(tctx, plato_english_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -383,11 +568,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_english_utf8.data, plato_english_utf8.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from UTF8 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -436,11 +621,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_english_utf16le.data, plato_english_utf16le.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from UTF16LE to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_DOS, @@ -457,11 +642,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DOS, CH_DISPLAY, + CH_DOS, CH_UTF8, plato_english_iso8859_1.data, plato_english_iso8859_1.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from (dos charset) ISO8859-1 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF16LE, @@ -472,6 +657,261 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx return true; } +static bool test_plato_english_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_english_utf8 = data_blob_string_const(plato_english_ascii); + DATA_BLOB plato_english_utf16le = base64_decode_data_blob(plato_english_utf16le_base64); + DATA_BLOB plato_english_output; + DATA_BLOB plato_english_utf8_terminated; + DATA_BLOB plato_english_utf16le_terminated; + + talloc_steal(tctx, plato_english_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_english_utf8_terminated = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 1); + memcpy(plato_english_utf8_terminated.data, plato_english_utf8.data, plato_english_utf8.length); + plato_english_utf8_terminated.data[plato_english_utf8.length] = '\0'; + + plato_english_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 2); + memcpy(plato_english_utf16le_terminated.data, plato_english_utf16le.data, plato_english_utf16le.length); + plato_english_utf16le_terminated.data[plato_english_utf16le.length] = '\0'; + plato_english_utf16le_terminated.data[plato_english_utf16le.length + 1] = '\0'; + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 1, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 2, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 1, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 2, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[3] = '\0'; + plato_english_utf8_terminated.length = 4; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[6] = '\0'; + plato_english_utf16le_terminated.data[7] = '\0'; + plato_english_utf16le_terminated.length = 8; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[1] = '\0'; + plato_english_utf8_terminated.length = 2; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[2] = '\0'; + plato_english_utf16le_terminated.data[3] = '\0'; + plato_english_utf16le_terminated.length = 4; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + +static bool test_plato_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64); + DATA_BLOB plato_output; + DATA_BLOB plato_utf8_terminated; + DATA_BLOB plato_utf16le_terminated; + + talloc_steal(tctx, plato_utf8.data); + talloc_steal(tctx, plato_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_utf8_terminated = data_blob_talloc(tctx, NULL, plato_utf8.length + 1); + memcpy(plato_utf8_terminated.data, plato_utf8.data, plato_utf8.length); + plato_utf8_terminated.data[plato_utf8.length] = '\0'; + + plato_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_utf16le.length + 2); + memcpy(plato_utf16le_terminated.data, plato_utf16le.data, plato_utf16le.length); + plato_utf16le_terminated.data[plato_utf16le.length] = '\0'; + plato_utf16le_terminated.data[plato_utf16le.length + 1] = '\0'; + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 1, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 2, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 1, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 2, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_utf8_terminated.data[5] = '\0'; + plato_utf8_terminated.length = 6; /* used for the comparison only */ + + plato_utf16le_terminated.data[4] = '\0'; + plato_utf16le_terminated.data[5] = '\0'; + plato_utf16le_terminated.length = 6; /* used for the comparison only */ + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + return true; +} + static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) { struct smb_iconv_handle *iconv_handle; @@ -483,7 +923,7 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) talloc_steal(tctx, plato_utf8.data); talloc_steal(tctx, plato_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8"); torture_assert(tctx, iconv_handle, "creating iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -568,11 +1008,11 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_utf8.data, plato_utf8.length, (void *)&plato_output.data, &plato_output.length), "conversion of UTF16 ancient greek to unix charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -627,39 +1067,39 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) "conversion of UTF16 ancient greek to UTF8 failed"); torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_utf16le.data, plato_utf16le.length, (void *)&plato_output.data, &plato_output.length), - "conversion of UTF16 ancient greek to display charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion of UTF16 ancient greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF16LE, + CH_UTF8, CH_UTF16LE, plato_output.data, plato_output.length, (void *)&plato_output2.data, &plato_output2.length), - "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_output2, plato_utf16le, - "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_output.data, - CH_DISPLAY, CH_UTF16LE), + CH_UTF8, CH_UTF16LE), plato_output2.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF8, + CH_UTF8, CH_UTF8, plato_output.data, plato_output.length, (void *)&plato_output2.data, &plato_output2.length), - "conversion of display charset UTF8 to UTF8"); + "conversion of UTF8 to UTF8"); torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, - "conversion of display charset UTF8 to UTF8"); + "conversion of UTF8 to UTF8"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_output.data, - CH_DISPLAY, CH_UTF8), + CH_UTF8, CH_UTF8), plato_output2.length, - "checking strlen_m_ext of conversion of display charset UTF8 to UTF8"); + "checking strlen_m_ext of conversion of UTF8 to UTF8"); return true; } @@ -674,7 +1114,7 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) talloc_steal(tctx, plato_latin_utf8.data); talloc_steal(tctx, plato_latin_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8"); torture_assert(tctx, iconv_handle, "creating iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -691,11 +1131,11 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_latin_utf8.data, plato_latin_utf8.length, (void *)&plato_latin_output.data, &plato_latin_output.length), "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -711,25 +1151,25 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_latin_utf16le.data, plato_latin_utf16le.length, (void *)&plato_latin_output.data, &plato_latin_output.length), - "conversion of UTF16 latin charset greek to display charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion of UTF16 latin charset greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF16LE, + CH_UTF8, CH_UTF16LE, plato_latin_output.data, plato_latin_output.length, (void *)&plato_latin_output2.data, &plato_latin_output2.length), - "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_latin_output2, plato_latin_utf16le, - "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_latin_output.data, - CH_DISPLAY, CH_UTF16LE), + CH_UTF8, CH_UTF16LE), plato_latin_output2.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); return true; } @@ -742,7 +1182,7 @@ static bool test_gd_case_utf8_handle(struct torture_context *tctx) char *gd_lower, *gd_upper; talloc_steal(tctx, gd_utf8.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); torture_assert(tctx, @@ -805,7 +1245,7 @@ static bool test_gd_case_cp850_handle(struct torture_context *tctx) char *gd_lower, *gd_upper; talloc_steal(tctx, gd_cp850.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850", "CP850"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850"); torture_assert(tctx, iconv_handle, "getting cp850 iconv handle"); torture_assert(tctx, @@ -866,7 +1306,7 @@ static bool test_plato_case_utf8_handle(struct torture_context *tctx) char *plato_lower, *plato_upper; talloc_steal(tctx, plato_utf8.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); torture_assert(tctx, @@ -1248,9 +1688,12 @@ struct torture_suite *torture_local_convert_string_handle(TALLOC_CTX *mem_ctx) struct torture_suite *suite = torture_suite_create(mem_ctx, "convert_string_handle"); torture_suite_add_simple_test(suite, "gd_ascii", test_gd_ascii_handle); + torture_suite_add_simple_test(suite, "gd_minus_1", test_gd_minus_1_handle); torture_suite_add_simple_test(suite, "gd_iso8859_cp850", test_gd_iso8859_cp850_handle); torture_suite_add_simple_test(suite, "plato_english_iso8859_cp850", test_plato_english_iso8859_cp850_handle); + torture_suite_add_simple_test(suite, "plato_english_minus_1", test_plato_english_minus_1_handle); torture_suite_add_simple_test(suite, "plato_cp850_utf8", test_plato_cp850_utf8_handle); + torture_suite_add_simple_test(suite, "plato_minus_1", test_plato_minus_1_handle); torture_suite_add_simple_test(suite, "plato_latin_cp850_utf8", test_plato_latin_cp850_utf8_handle); return suite; } diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c index e8f0b788b1..688ab5a0a1 100644 --- a/lib/util/charset/util_str.c +++ b/lib/util/charset/util_str.c @@ -5,6 +5,8 @@ Copyright (C) Simo Sorce 2001 Copyright (C) Andrew Bartlett 2011 Copyright (C) Jeremy Allison 1992-2007 + Copyright (C) Martin Pool 2003 + Copyright (C) James Peach 2006 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -167,7 +169,6 @@ _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic, switch (dst_charset) { case CH_DOS: case CH_UNIX: - case CH_DISPLAY: smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)"); default: break; @@ -327,7 +328,7 @@ _PUBLIC_ char *strchr_m(const char *src, char c) for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { if (*s == c) - return (char *)s; + return discard_const_p(char, s); } if (!*s) @@ -395,7 +396,7 @@ _PUBLIC_ char *strrchr_m(const char *s, char c) break; } /* No - we have a match ! */ - return (char *)cp; + return discard_const_p(char , cp); } } while (cp-- != s); if (!got_mb) @@ -473,3 +474,84 @@ _PUBLIC_ bool strhasupper(const char *string) struct smb_iconv_handle *ic = get_iconv_handle(); return strhasupper_handle(ic, string); } + +/*********************************************************************** + strstr_m - We convert via ucs2 for now. +***********************************************************************/ + +char *strstr_m(const char *src, const char *findstr) +{ + smb_ucs2_t *p; + smb_ucs2_t *src_w, *find_w; + const char *s; + char *s2; + char *retp; + size_t converted_size, findstr_len = 0; + + TALLOC_CTX *frame; /* Only set up in the iconv case */ + + /* for correctness */ + if (!findstr[0]) { + return discard_const_p(char, src); + } + + /* Samba does single character findstr calls a *lot*. */ + if (findstr[1] == '\0') + return strchr_m(src, *findstr); + + /* We optimise for the ascii case, knowing that all our + supported multi-byte character sets are ascii-compatible + (ie. they match for the first 128 chars) */ + + for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { + if (*s == *findstr) { + if (!findstr_len) + findstr_len = strlen(findstr); + + if (strncmp(s, findstr, findstr_len) == 0) { + return discard_const_p(char, s); + } + } + } + + if (!*s) + return NULL; + +#if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */ + /* 'make check' fails unless we do this */ + + /* With compose characters we must restart from the beginning. JRA. */ + s = src; +#endif + + frame = talloc_stackframe(); + + if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) { + DEBUG(0,("strstr_m: src malloc fail\n")); + TALLOC_FREE(frame); + return NULL; + } + + if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) { + DEBUG(0,("strstr_m: find malloc fail\n")); + TALLOC_FREE(frame); + return NULL; + } + + p = strstr_w(src_w, find_w); + + if (!p) { + TALLOC_FREE(frame); + return NULL; + } + + *p = 0; + if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) { + TALLOC_FREE(frame); + DEBUG(0,("strstr_m: dest malloc fail\n")); + return NULL; + } + retp = discard_const_p(char, (s+strlen(s2))); + TALLOC_FREE(frame); + return retp; +} diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index a1be501c7c..e4ae65053c 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -161,85 +161,6 @@ _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src) } /** - Convert a string to lower case. -**/ -_PUBLIC_ void strlower_m(char *s) -{ - char *d; - struct smb_iconv_handle *iconv_handle; - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars) */ - while (*s && !(((uint8_t)*s) & 0x80)) { - *s = tolower((uint8_t)*s); - s++; - } - - if (!*s) - return; - - iconv_handle = get_iconv_handle(); - - d = s; - - while (*s) { - size_t c_size, c_size2; - codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size); - c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c)); - if (c_size2 > c_size) { - DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n", - c, tolower_m(c), (int)c_size, (int)c_size2)); - smb_panic("codepoint expansion in strlower_m\n"); - } - s += c_size; - d += c_size2; - } - *d = 0; -} - -/** - Convert a string to UPPER case. -**/ -_PUBLIC_ void strupper_m(char *s) -{ - char *d; - struct smb_iconv_handle *iconv_handle; - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars) */ - while (*s && !(((uint8_t)*s) & 0x80)) { - *s = toupper((uint8_t)*s); - s++; - } - - if (!*s) - return; - - iconv_handle = get_iconv_handle(); - - d = s; - - while (*s) { - size_t c_size, c_size2; - codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size); - c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c)); - if (c_size2 > c_size) { - DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n", - c, toupper_m(c), (int)c_size, (int)c_size2)); - smb_panic("codepoint expansion in strupper_m\n"); - } - s += c_size; - d += c_size2; - } - *d = 0; -} - - -/** Find the number of 'c' chars in a string **/ _PUBLIC_ size_t count_chars_m(const char *s, char c) @@ -273,7 +194,7 @@ _PUBLIC_ size_t count_chars_m(const char *s, char c) * @param dest_len the maximum length in bytes allowed in the * destination. If @p dest_len is -1 then no maximum is used. **/ -static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size) +static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size) { size_t src_len; bool ret; @@ -283,7 +204,7 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, if (tmpbuf == NULL) { return false; } - ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size); + ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size); talloc_free(tmpbuf); return ret; } @@ -297,23 +218,6 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, } /** - * Copy a string from a unix char* src to an ASCII destination, - * allocating a buffer using talloc(). - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - * or -1 in case of error. - **/ -_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size); -} - - -/** * Copy a string from a dos codepage source to a unix char* destination. * * The resulting string in "dest" is always null terminated. @@ -328,7 +232,7 @@ _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, s * @param src_len is the length of the source area in bytes. * @returns the number of bytes occupied by the string in @p src. **/ -static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) +static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) { size_t size = 0; @@ -411,38 +315,6 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags /** - * Copy a string from a unix char* src to a UCS2 destination, - * allocating a buffer using talloc(). - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - * or -1 in case of error. - **/ -_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size); -} - - -/** - * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size); -} - -/** Copy a string from a ucs2 source to a unix char* destination. Flags can have: STR_TERMINATE means the string in src is null terminated. @@ -484,51 +356,6 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src } /** - * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** - * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size) -{ - size_t src_len = utf16_len(src); - *dest = NULL; - return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** - * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** Copy a string from a char* src to a unicode or ascii dos codepage destination choosing unicode or ascii based on the flags in the SMB buffer starting at base_ptr. @@ -546,7 +373,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f { if (flags & STR_ASCII) { size_t size = 0; - if (push_ascii(dest, src, dest_len, flags, &size)) { + if (push_ascii_string(dest, src, dest_len, flags, &size)) { return (ssize_t)size; } else { return (ssize_t)-1; @@ -577,7 +404,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) { if (flags & STR_ASCII) { - return pull_ascii(dest, src, dest_len, src_len, flags); + return pull_ascii_string(dest, src, dest_len, src_len, flags); } else if (flags & STR_UNICODE) { return pull_ucs2(dest, src, dest_len, src_len, flags); } else { @@ -585,68 +412,3 @@ _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_ return -1; } } - - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @param converted_size the number of bytes occupied in the destination - * - * @returns true on success, false on fail. - **/ -_PUBLIC_ bool convert_string(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - return convert_string_handle(get_iconv_handle(), from, to, - src, srclen, - dest, destlen, converted_size); -} - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @param converted_size the number of bytes occupied in the destination - * - * @returns true on success, false on fail. - **/ -_PUBLIC_ bool convert_string_error(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - return convert_string_error_handle(get_iconv_handle(), from, to, - src, srclen, - dest, destlen, converted_size); -} - -/** - * Convert between character sets, allocating a new buffer using talloc for the result. - * - * @param srclen length of source buffer. - * @param dest always set at least to NULL - * @param converted_size Size in bytes of the converted string - * @note -1 is not accepted for srclen. - * - * @returns boolean indication whether the conversion succeeded - **/ - -_PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t *converted_size) -{ - return convert_string_talloc_handle(ctx, get_iconv_handle(), - from, to, src, srclen, dest, - converted_size); -} - diff --git a/lib/util/charset/util_unistr_w.c b/lib/util/charset/util_unistr_w.c index a550e52776..3fbed7f67c 100644 --- a/lib/util/charset/util_unistr_w.c +++ b/lib/util/charset/util_unistr_w.c @@ -22,8 +22,8 @@ #include "includes.h" /* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */ -#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\ - ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest)) +#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((const unsigned char *)(src))[0],\ + ((unsigned char *)(dest))[1] = ((const unsigned char *)(src))[1], (dest)) /* return an ascii version of a ucs2 character */ @@ -72,12 +72,12 @@ smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) smb_ucs2_t cp; while (*(COPY_UCS2_CHAR(&cp,s))) { if (c == cp) { - return (smb_ucs2_t *)s; + return discard_const_p(smb_ucs2_t, s); } s++; } if (c == cp) { - return (smb_ucs2_t *)s; + return discard_const_p(smb_ucs2_t, s); } return NULL; @@ -104,7 +104,7 @@ smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) p += (len - 1); do { if (c == *(COPY_UCS2_CHAR(&cp,p))) { - return (smb_ucs2_t *)p; + return discard_const_p(smb_ucs2_t, p); } } while (p-- != s); return NULL; @@ -234,38 +234,6 @@ static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0; } -/******************************************************************* - Case insensitive string comparison. -********************************************************************/ - -int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) -{ - smb_ucs2_t cpa, cpb; - - while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) { - a++; - b++; - } - return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))); -} - -/******************************************************************* - Case insensitive string comparison, length limited. -********************************************************************/ - -int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) -{ - smb_ucs2_t cpa, cpb; - size_t n = 0; - - while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) { - a++; - b++; - n++; - } - return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0; -} - /* The *_wa() functions take a combination of 7 bit ascii and wide characters They are used so that you can use string diff --git a/lib/util/charset/weird.c b/lib/util/charset/weird.c new file mode 100644 index 0000000000..5db8cdcecd --- /dev/null +++ b/lib/util/charset/weird.c @@ -0,0 +1,134 @@ +/* + Unix SMB/CIFS implementation. + Samba module with developer tools + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Jelmer Vernooij 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" + +static struct { + char from; + const char *to; + int len; +} weird_table[] = { + {'q', "^q^", 3}, + {'Q', "^Q^", 3}, + {0, NULL} +}; + +static size_t weird_pull(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + int i; + int done = 0; + for (i=0;weird_table[i].from;i++) { + if (strncmp((*inbuf), + weird_table[i].to, + weird_table[i].len) == 0) { + if (*inbytesleft < weird_table[i].len) { + DEBUG(0,("ERROR: truncated weird string\n")); + /* smb_panic("weird_pull"); */ + + } else { + (*outbuf)[0] = weird_table[i].from; + (*outbuf)[1] = 0; + (*inbytesleft) -= weird_table[i].len; + (*outbytesleft) -= 2; + (*inbuf) += weird_table[i].len; + (*outbuf) += 2; + done = 1; + break; + } + } + } + if (done) continue; + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t weird_push(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int ir_count=0; + + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + int i; + int done=0; + for (i=0;weird_table[i].from;i++) { + if ((*inbuf)[0] == weird_table[i].from && + (*inbuf)[1] == 0) { + if (*outbytesleft < weird_table[i].len) { + DEBUG(0,("No room for weird character\n")); + /* smb_panic("weird_push"); */ + } else { + memcpy(*outbuf, weird_table[i].to, + weird_table[i].len); + (*inbytesleft) -= 2; + (*outbytesleft) -= weird_table[i].len; + (*inbuf) += 2; + (*outbuf) += weird_table[i].len; + done = 1; + break; + } + } + } + if (done) continue; + + (*outbuf)[0] = (*inbuf)[0]; + if ((*inbuf)[1]) ir_count++; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return ir_count; +} + +struct charset_functions weird_functions = {"WEIRD", weird_pull, weird_push}; + +NTSTATUS charset_weird_init(void); +NTSTATUS charset_weird_init(void) +{ + if (!smb_register_charset(&weird_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build index 29e168dce1..1f2c8dfa7a 100644 --- a/lib/util/charset/wscript_build +++ b/lib/util/charset/wscript_build @@ -1,18 +1,44 @@ #!/usr/bin/env python - -if bld.env._SAMBA_BUILD_ == 4: - bld.SAMBA_SUBSYSTEM('CHARSET', - source='charcnv.c util_unistr.c', - public_deps='CODEPOINTS', - public_headers='charset.h', - ) - bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER', source='iconv.c', public_deps='iconv replace talloc') -bld.SAMBA_SUBSYSTEM('CODEPOINTS', - source='codepoints.c util_str.c util_unistr_w.c', - deps='DYNCONFIG ICONV_WRAPPER' - ) +bld.SAMBA_SUBSYSTEM('CHARSET', + public_headers='charset.h', + source='codepoints.c convert_string.c util_str.c util_unistr_w.c charcnv.c pull_push.c util_unistr.c', + deps='DYNCONFIG ICONV_WRAPPER', + public_deps='talloc') + +bld.SAMBA_MODULE('charset_weird', + subsystem='CHARSET', + source='weird.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_weird'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_weird')) + +bld.SAMBA_MODULE('charset_CP850', + subsystem='CHARSET', + source='CP850.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP850'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP850')) + +bld.SAMBA_MODULE('charset_CP437', + subsystem='CHARSET', + source='CP437.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP437'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP437')) + +bld.SAMBA_MODULE('charset_macosxfs', + subsystem='CHARSET', + source='charset_macosxfs.c', + init_function='', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_macosxfs'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_macosxfs')) + + |