diff options
author | Peng Wu <alexepico@gmail.com> | 2016-05-13 14:50:55 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2016-05-13 14:50:55 +0800 |
commit | ed191f413f2739982fc737022713cb5226f721de (patch) | |
tree | b79f3c6b08280520fb5e6ff818996755080f3e18 | |
parent | 5b487bd6d66f9a8f43260a9bec978ea8cf692995 (diff) | |
download | libpinyin-ed191f413f2739982fc737022713cb5226f721de.tar.gz libpinyin-ed191f413f2739982fc737022713cb5226f721de.tar.xz libpinyin-ed191f413f2739982fc737022713cb5226f721de.zip |
write fuzzy_syllable_step function
-rw-r--r-- | src/storage/phonetic_key_matrix.cpp | 103 | ||||
-rw-r--r-- | src/storage/phonetic_key_matrix.h | 14 |
2 files changed, 110 insertions, 7 deletions
diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp index b0b69f9..693753e 100644 --- a/src/storage/phonetic_key_matrix.cpp +++ b/src/storage/phonetic_key_matrix.cpp @@ -20,6 +20,7 @@ */ #include "phonetic_key_matrix.h" +#include "pinyin_custom2.h" #include <assert.h> #include <stdio.h> @@ -30,8 +31,8 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, ChewingKeyRestVector key_rests) { assert(keys->len == key_rests->len); - ChewingKey * key = NULL; - ChewingKeyRest * key_rest = NULL; + const ChewingKey * key = NULL; + const ChewingKeyRest * key_rest = NULL; /* last key rest. */ key_rest = &g_array_index(key_rests, ChewingKeyRest, key_rests->len - 1); @@ -71,6 +72,92 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, return true; } +bool fuzzy_syllable_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix) { + size_t length = matrix->size(); + + GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest)); + + for (size_t index = 0; index < length; ++index) { + /* for pinyin initials. */ + matrix->get_items(index, keys, key_rests); + assert(keys->len == key_rests->len); + if (0 == keys->len) + continue; + + size_t i = 0; + for (i = 0; i < keys->len; ++i) { + ChewingKey key = g_array_index(keys, ChewingKey, i); + ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); + +#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) do { \ + if (options & AMBIGUITY) { \ + if (ORIGIN == key.m_initial) { \ + key.m_initial = ANOTHER; \ + matrix->append(index, key, key_rest); \ + } \ + } \ + } while (0) + + + MATCH(PINYIN_AMB_C_CH, CHEWING_C, CHEWING_CH); + MATCH(PINYIN_AMB_C_CH, CHEWING_CH, CHEWING_C); + MATCH(PINYIN_AMB_Z_ZH, CHEWING_Z, CHEWING_ZH); + MATCH(PINYIN_AMB_Z_ZH, CHEWING_ZH, CHEWING_Z); + MATCH(PINYIN_AMB_S_SH, CHEWING_S, CHEWING_SH); + MATCH(PINYIN_AMB_S_SH, CHEWING_SH, CHEWING_S); + MATCH(PINYIN_AMB_L_R, CHEWING_L, CHEWING_R); + MATCH(PINYIN_AMB_L_R, CHEWING_R, CHEWING_L); + MATCH(PINYIN_AMB_L_N, CHEWING_L, CHEWING_N); + MATCH(PINYIN_AMB_L_N, CHEWING_N, CHEWING_L); + MATCH(PINYIN_AMB_F_H, CHEWING_F, CHEWING_H); + MATCH(PINYIN_AMB_F_H, CHEWING_H, CHEWING_F); + MATCH(PINYIN_AMB_G_K, CHEWING_G, CHEWING_K); + MATCH(PINYIN_AMB_G_K, CHEWING_K, CHEWING_G); + +#undef MATCH + + } + + /* for pinyin finals. */ + matrix->get_items(index, keys, key_rests); + assert(keys->len == key_rests->len); + assert(0 != keys->len); + + for (i = 0; i < keys->len; ++i) { + ChewingKey key = g_array_index(keys, ChewingKey, i); + ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); + +#define MATCH(AMBIGUITY, ORIGIN, ANOTHER) do { \ + if (options & AMBIGUITY) { \ + if (ORIGIN == key.m_final) { \ + key.m_final = ANOTHER; \ + matrix->append(index, key, key_rest); \ + } \ + } \ + } while (0) + + + MATCH(PINYIN_AMB_AN_ANG, CHEWING_AN, CHEWING_ANG); + MATCH(PINYIN_AMB_AN_ANG, CHEWING_ANG, CHEWING_AN); + MATCH(PINYIN_AMB_EN_ENG, CHEWING_EN, CHEWING_ENG); + MATCH(PINYIN_AMB_EN_ENG, CHEWING_ENG, CHEWING_EN); + MATCH(PINYIN_AMB_IN_ING, PINYIN_IN, PINYIN_ING); + MATCH(PINYIN_AMB_IN_ING, PINYIN_ING, PINYIN_IN); + +#undef MATCH + + } + } + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + return true; +} + bool dump_phonetic_key_matrix(PhoneticKeyMatrix * matrix) { size_t length = matrix->size(); @@ -80,18 +167,20 @@ bool dump_phonetic_key_matrix(PhoneticKeyMatrix * matrix) { for (size_t index = 0; index < length; ++index) { matrix->get_items(index, keys, key_rests); assert(keys->len == key_rests->len); + if (0 == keys->len) + continue; printf("Column:%ld:\n", index); for (size_t i = 0; i < keys->len; ++i) { - ChewingKey * key = &g_array_index(keys, ChewingKey, i); - ChewingKeyRest * key_rest = &g_array_index(key_rests, - ChewingKeyRest, i); + ChewingKey key = g_array_index(keys, ChewingKey, i); + ChewingKeyRest key_rest = g_array_index(key_rests, + ChewingKeyRest, i); - gchar * pinyin = key->get_pinyin_string(); + gchar * pinyin = key.get_pinyin_string(); printf("ChewingKey:%s\n", pinyin); printf("ChewingKeyRest:%hd\t%hd\n", - key_rest->m_raw_begin, key_rest->m_raw_end); + key_rest.m_raw_begin, key_rest.m_raw_end); g_free(pinyin); } } diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h index 99aeb11..8d6b49a 100644 --- a/src/storage/phonetic_key_matrix.h +++ b/src/storage/phonetic_key_matrix.h @@ -134,10 +134,24 @@ public: }; +/** + * fill_phonetic_key_matrix_from_chewing_keys: + * Convert ChewingKeyVector and ChewingKeyRestVector + * to PhoneticKeyMatrix. + */ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix, ChewingKeyVector keys, ChewingKeyRestVector key_rests); +/** + * fuzzy_syllable_step: + * For "an" <=> "ang", fill the fuzzy pinyins into the matrix. + * Supported nearly in all pinyin parsers. + * At most 3 * 2 entries will be added. + */ +bool fuzzy_syllable_step(pinyin_option_t options, + PhoneticKeyMatrix * matrix); + bool dump_phonetic_key_matrix(PhoneticKeyMatrix * matrix); }; |