summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2016-05-17 17:25:49 +0800
committerPeng Wu <alexepico@gmail.com>2016-05-17 17:25:49 +0800
commit166f65c916579eec9ed3004879a71146ecad6443 (patch)
treeb68cc3339ae7ecfef7f16f42eef77c90838cc68d
parent84e1ba97704b4e4cf0185af105c768ae3a693206 (diff)
downloadlibpinyin-166f65c916579eec9ed3004879a71146ecad6443.tar.gz
libpinyin-166f65c916579eec9ed3004879a71146ecad6443.tar.xz
libpinyin-166f65c916579eec9ed3004879a71146ecad6443.zip
write inner_split_step function
-rw-r--r--src/storage/phonetic_key_matrix.cpp2
-rw-r--r--src/storage/phonetic_key_matrix.h9
-rw-r--r--src/storage/pinyin_parser2.cpp56
3 files changed, 67 insertions, 0 deletions
diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp
index af5b1a0..6f92e45 100644
--- a/src/storage/phonetic_key_matrix.cpp
+++ b/src/storage/phonetic_key_matrix.cpp
@@ -72,6 +72,7 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix,
return true;
}
+
bool fuzzy_syllable_step(pinyin_option_t options,
PhoneticKeyMatrix * matrix) {
size_t length = matrix->size();
@@ -160,6 +161,7 @@ bool fuzzy_syllable_step(pinyin_option_t options,
return true;
}
+
bool dump_phonetic_key_matrix(PhoneticKeyMatrix * matrix) {
size_t length = matrix->size();
diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h
index 8d6b49a..f5054bd 100644
--- a/src/storage/phonetic_key_matrix.h
+++ b/src/storage/phonetic_key_matrix.h
@@ -144,6 +144,15 @@ bool fill_phonetic_key_matrix_from_chewing_keys(PhoneticKeyMatrix * matrix,
ChewingKeyRestVector key_rests);
/**
+ * inner_split_step:
+ * For "xian" => "xi'an", add "xi'an" to the matrix for matched "xian".
+ * Currently only support this feature in Hanyu Pinyin.
+ */
+bool inner_split_step(pinyin_option_t options,
+ PhoneticKeyMatrix * matrix);
+
+
+/**
* fuzzy_syllable_step:
* For "an" <=> "ang", fill the fuzzy pinyins into the matrix.
* Supported nearly in all pinyin parsers.
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 66ee0df..a4f5cae 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -32,6 +32,7 @@
#include "chewing_key.h"
#include "pinyin_parser_table.h"
#include "double_pinyin_table.h"
+#include "phonetic_key_matrix.h"
using namespace pinyin;
@@ -899,3 +900,58 @@ int PinyinDirectParser2::parse(pinyin_option_t options,
return parsed_len;
}
+
+
+/* need to use the pinyin_parser_table header. */
+bool inner_split_step(pinyin_option_t options,
+ PhoneticKeyMatrix * matrix) {
+ size_t length = matrix->size();
+
+ GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+ GArray * key_rests = g_array_new(TRUE, TRUE, sizeof(ChewingKeyRest));
+
+ for (size_t index = 0; index < length; ++index) {
+ matrix->get_items(index, keys, key_rests);
+ assert(keys->len == key_rests->len);
+ if (0 == keys->len)
+ continue;
+
+ for (size_t i = 0; i < keys->len; ++i) {
+ const ChewingKey key = g_array_index(keys, ChewingKey, i);
+ const ChewingKeyRest key_rest = g_array_index(key_rests,
+ ChewingKeyRest, i);
+
+ /* lookup divided table */
+ size_t k;
+ const divided_table_item_t * item = NULL;
+ for (k = 0; k < G_N_ELEMENTS(divided_table); ++k) {
+ item = divided_table + k;
+
+ if (key == item->m_orig_struct)
+ break;
+ }
+
+ /* found the match */
+ if (k < G_N_ELEMENTS(divided_table)) {
+ /* divide the key */
+ item = divided_table + k;
+
+ size_t midindex = index + strlen(item->m_new_keys[0]);
+
+ ChewingKey newkey = item->m_new_structs[0];
+ ChewingKeyRest newkeyrest = key_rest;
+ newkeyrest.m_raw_end = midindex;
+ matrix->append(index, newkey, newkeyrest);
+
+ newkey = item->m_new_structs[1];
+ newkeyrest = key_rest;
+ newkeyrest.m_raw_begin = midindex;
+ matrix->append(midindex, newkey, newkeyrest);
+ }
+ }
+ }
+
+ g_array_free(keys, TRUE);
+ g_array_free(key_rests, TRUE);
+ return true;
+}