diff options
| author | Peng Wu <alexepico@gmail.com> | 2024-09-26 14:13:42 +0800 |
|---|---|---|
| committer | Peng Wu <alexepico@gmail.com> | 2024-09-27 09:54:21 +0800 |
| commit | 43c867fb6ed0f29cb3b75aea485f9952f6cdb7f6 (patch) | |
| tree | a6a255542d4fba695cac756f317b33bdedb6cc8c /src/storage/punct_table.cpp | |
| parent | 17b616cbc44dcdf662edb665e0f3ddee9d065070 (diff) | |
| download | libpinyin-43c867fb6ed0f29cb3b75aea485f9952f6cdb7f6.tar.gz libpinyin-43c867fb6ed0f29cb3b75aea485f9952f6cdb7f6.tar.xz libpinyin-43c867fb6ed0f29cb3b75aea485f9952f6cdb7f6.zip | |
Update class PunctTable
Diffstat (limited to 'src/storage/punct_table.cpp')
| -rw-r--r-- | src/storage/punct_table.cpp | 32 |
1 files changed, 13 insertions, 19 deletions
diff --git a/src/storage/punct_table.cpp b/src/storage/punct_table.cpp index 184be92..461d4bc 100644 --- a/src/storage/punct_table.cpp +++ b/src/storage/punct_table.cpp @@ -23,6 +23,8 @@ using namespace pinyin; +static const ucs4_t null_char = 0; + PunctTableEntry::PunctTableEntry() { m_ucs4_cache = g_array_new(TRUE, TRUE, sizeof(ucs4_t)); m_utf8_cache = g_string_new(NULL); @@ -44,11 +46,8 @@ bool PunctTableEntry::escape(const gchar * punct, gint maxlen) { glong ucs4_len = 0; gunichar * ucs4_str = g_utf8_to_ucs4(punct, maxlen, NULL, &ucs4_len, NULL); - for(int i = 0; i < ucs4_len; ++i) { - g_array_append_val(m_ucs4_cache, ucs4_str[i]); - if (i < ucs4_len - 1) - g_array_append_val(m_ucs4_cache, ucs4_str[i]); - } + g_array_append_vals(m_ucs4_cache, ucs4_str, ucs4_len); + g_array_append_val(m_ucs4_cache, null_char); g_free(ucs4_str); return true; @@ -64,15 +63,11 @@ int PunctTableEntry::unescape(const ucs4_t * punct, gint maxlen) { while (index < maxlen) { g_string_append_unichar(m_utf8_cache, punct[index]); index++; - if (index >= maxlen) - break; - if (punct[index - 1] == punct[index]) - index++; - else + if (punct[index] == null_char) break; } - return index; + return index + 1; } bool PunctTableEntry::get_all_punctuations(gchar ** & puncts) { @@ -102,8 +97,11 @@ bool PunctTableEntry::append_punctuation(const gchar * punct) { gchar ** puncts = NULL; get_all_punctuations(puncts); - if (puncts && g_strv_contains(puncts, punct)) - abort(); + if (puncts && g_strv_contains(puncts, punct)) { + fprintf(stderr, "duplicated punctuations: %s\n", punct); + g_strfreev(puncts); + return false; + } g_strfreev(puncts); if (!escape(punct)) @@ -138,13 +136,9 @@ bool PunctTableEntry::remove_punctuation(const gchar * punct) { } /* check the next punctuation index */ + while (null_char != *(begin + index)) + index++; index++; - while (begin + index < end) { - if (begin[index - 1] == begin[index]) - index += 2; - else - break; - } } return false; |
