summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2015-01-22 12:28:12 +0800
committerPeng Wu <alexepico@gmail.com>2015-01-22 12:28:12 +0800
commit71f0161885f333ee13d81215b201bef9fd28c928 (patch)
treeb05e1687fc3eefd31b28ac2df791ada847ee3ae8
parent99266561a5c5ec8e43944e807ef795c72fa46170 (diff)
downloadlibpinyin-71f0161885f333ee13d81215b201bef9fd28c928.tar.gz
libpinyin-71f0161885f333ee13d81215b201bef9fd28c928.tar.xz
libpinyin-71f0161885f333ee13d81215b201bef9fd28c928.zip
support to export user phrases
-rw-r--r--src/pinyin.cpp119
-rw-r--r--src/pinyin.h13
2 files changed, 132 insertions, 0 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 10ce149..a990730 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -90,6 +90,13 @@ struct _import_iterator_t{
guint8 m_phrase_index;
};
+struct _export_iterator_t{
+ pinyin_context_t * m_context;
+ guint8 m_phrase_index;
+ /* null token means no next item. */
+ phrase_token_t m_next_token;
+ guint8 m_next_pronunciation;
+};
static bool check_format(pinyin_context_t * context){
const char * userdir = context->m_user_dir;
@@ -484,6 +491,118 @@ void pinyin_end_add_phrases(import_iterator_t * iter){
delete iter;
}
+export_iterator_t * pinyin_begin_get_phrases(pinyin_context_t * context,
+ guint index){
+ export_iterator_t * iter = new export_iterator_t;
+ iter->m_context = context;
+ iter->m_phrase_index = index;
+ iter->m_next_token = null_token;
+ iter->m_next_pronunciation = 0;
+
+ /* probe next token. */
+ PhraseIndexRange range;
+ int retval = iter->m_context->m_phrase_index->get_range
+ (iter->m_phrase_index, range);
+ if (retval != ERROR_OK)
+ return iter;
+
+ PhraseItem item;
+ phrase_token_t token = range.m_range_begin;
+ for (; token < range.m_range_end; ++token) {
+ retval = iter->m_context->m_phrase_index->get_phrase_item
+ (token, item);
+ if (ERROR_OK == retval && item.get_n_pronunciation() >= 1) {
+ iter->m_next_token = token;
+ break;
+ }
+ }
+ return iter;
+}
+
+bool pinyin_iterator_has_next_phrase(export_iterator_t * iter){
+ /* no next token. */
+ if (null_token == iter->m_next_token)
+ return false;
+ return true;
+}
+
+/* phrase, pinyin should be freed by g_free(). */
+bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
+ gchar ** phrase,
+ gchar ** pinyin,
+ gint * count){
+ /* count "-1" means default count. */
+ *phrase = NULL; *pinyin = NULL; *count = -1;
+
+ PhraseItem item;
+ int retval = iter->m_context->m_phrase_index->get_phrase_item
+ (iter->m_next_token, item);
+ /* assume valid next token from previous call. */
+ assert(ERROR_OK == retval);
+
+ /* fill phrase and pronunciation pair. */
+ ucs4_t phrase_ucs4[MAX_PHRASE_LENGTH];
+ guint8 len = item.get_phrase_length();
+ assert(item.get_phrase_string(phrase_ucs4));
+ gchar * phrase_utf8 = g_ucs4_to_utf8
+ (phrase_ucs4, len, NULL, NULL, NULL);
+
+ guint8 nth_pronun = iter->m_next_pronunciation;
+ guint8 n_pronuns = item.get_n_pronunciation();
+ /* assume valid pronunciation from previous call. */
+ assert(nth_pronun < n_pronuns);
+ ChewingKey keys[MAX_PHRASE_LENGTH];
+ guint32 freq = 0;
+ assert(item.get_nth_pronunciation(nth_pronun, keys, freq));
+
+ GPtrArray * array = g_ptr_array_new();
+ for(size_t i = 0; i < len; ++i) {
+ g_ptr_array_add(array, keys[i].get_pinyin_string());
+ }
+ g_ptr_array_add(array, NULL);
+
+ gchar ** strings = (gchar **)g_ptr_array_free(array, FALSE);
+ gchar * pinyins = g_strjoinv("'", strings);
+ g_strfreev(strings);
+
+ /* use default value. */
+ *phrase = phrase_utf8; *pinyin = pinyins;
+ if (freq > 0)
+ *count = freq;
+
+ /* probe next pronunciation. */
+ nth_pronun ++;
+ if (nth_pronun < n_pronuns) {
+ iter->m_next_pronunciation = nth_pronun;
+ return true;
+ }
+
+ iter->m_next_pronunciation = 0;
+ /* probe next token. */
+ PhraseIndexRange range;
+ retval = iter->m_context->m_phrase_index->get_range
+ (iter->m_phrase_index, range);
+ if (retval != ERROR_OK) {
+ iter->m_next_token = null_token;
+ return true;
+ }
+
+ phrase_token_t token = iter->m_next_token + 1;
+ for (; token < range.m_range_end; ++token) {
+ retval = iter->m_context->m_phrase_index->get_phrase_item
+ (token, item);
+ if (ERROR_OK == retval && item.get_n_pronunciation() >= 1) {
+ iter->m_next_token = token;
+ break;
+ }
+ }
+ return true;
+}
+
+void pinyin_end_get_phrases(export_iterator_t * iter){
+ delete iter;
+}
+
bool pinyin_save(pinyin_context_t * context){
if (!context->m_user_dir)
return false;
diff --git a/src/pinyin.h b/src/pinyin.h
index a421841..c0ec076 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -38,6 +38,7 @@ typedef struct _pinyin_instance_t pinyin_instance_t;
typedef struct _lookup_candidate_t lookup_candidate_t;
typedef struct _import_iterator_t import_iterator_t;
+typedef struct _export_iterator_t export_iterator_t;
typedef enum _lookup_candidate_type_t{
BEST_MATCH_CANDIDATE = 1,
@@ -120,6 +121,18 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
*/
void pinyin_end_add_phrases(import_iterator_t * iter);
+export_iterator_t * pinyin_begin_get_phrases(pinyin_context_t * context,
+ guint index);
+
+bool pinyin_iterator_has_next_phrase(export_iterator_t * iter);
+
+bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
+ gchar ** phrase,
+ gchar ** pinyin,
+ gint * count);
+
+void pinyin_end_get_phrases(export_iterator_t * iter);
+
/**
* pinyin_save:
* @context: the pinyin context to be saved into user directory.