summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-12-08 17:19:47 +0800
committerPeng Wu <alexepico@gmail.com>2011-12-09 13:34:47 +0800
commitc98f91e572ddcb4caf953e1d5032ac8442d7d2f4 (patch)
tree98881ca7a92015ada15e01870bf2b77ee6df878a
parent669d5b63478a83df3938ae128f52dd3ef4fd7d2f (diff)
downloadlibpinyin-c98f91e572ddcb4caf953e1d5032ac8442d7d2f4.tar.gz
libpinyin-c98f91e572ddcb4caf953e1d5032ac8442d7d2f4.tar.xz
libpinyin-c98f91e572ddcb4caf953e1d5032ac8442d7d2f4.zip
port pinyin.cpp
-rw-r--r--src/pinyin.cpp85
-rw-r--r--src/pinyin.h24
-rw-r--r--src/pinyin_internal.h1
3 files changed, 58 insertions, 52 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 8832321..a5ada67 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -26,14 +26,13 @@
/* a glue layer for input method integration. */
struct _pinyin_context_t{
- PinyinCustomSettings m_custom;
+ pinyin_option_t m_options;
- BitmapPinyinValidator m_validator;
- PinyinDefaultParser * m_default_parser;
- PinyinShuangPinParser * m_shuang_pin_parser;
- PinyinZhuYinParser * m_chewing_parser;
+ FullPinyinParser2 * m_default_parser;
+ DoublePinyinParser2 * m_shuang_pin_parser;
+ ChewingParser2 * m_chewing_parser;
- PinyinLargeTable * m_pinyin_table;
+ ChewingLargeTable * m_pinyin_table;
PhraseLargeTable * m_phrase_table;
FacadePhraseIndex * m_phrase_index;
Bigram * m_system_bigram;
@@ -55,7 +54,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
context->m_user_dir = g_strdup(userdir);
context->m_modified = false;
- context->m_pinyin_table = new PinyinLargeTable(&(context->m_custom));
+ context->m_pinyin_table = new ChewingLargeTable(context->m_options);
MemoryChunk * chunk = new MemoryChunk;
gchar * filename = g_build_filename
(context->m_system_dir, "pinyin_index.bin", NULL);
@@ -66,10 +65,9 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
g_free(filename);
context->m_pinyin_table->load(chunk);
- context->m_validator.initialize(context->m_pinyin_table);
- context->m_default_parser = new PinyinDefaultParser;
- context->m_shuang_pin_parser = new PinyinShuangPinParser;
- context->m_chewing_parser = new PinyinZhuYinParser;
+ context->m_default_parser = new FullPinyinParser2;
+ context->m_shuang_pin_parser = new DoublePinyinParser2;
+ context->m_chewing_parser = new ChewingParser2;
context->m_phrase_table = new PhraseLargeTable;
chunk = new MemoryChunk;
@@ -119,7 +117,7 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
g_free(filename);
context->m_pinyin_lookup = new PinyinLookup
- ( &(context->m_custom), context->m_pinyin_table,
+ ( context->m_options, context->m_pinyin_table,
context->m_phrase_index, context->m_system_bigram,
context->m_user_bigram);
@@ -173,13 +171,13 @@ bool pinyin_save(pinyin_context_t * context){
}
bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
- PinyinShuangPinScheme scheme){
+ DoublePinyinScheme scheme){
context->m_shuang_pin_parser->set_scheme(scheme);
return true;
}
bool pinyin_set_chewing_scheme(pinyin_context_t * context,
- PinyinZhuYinScheme scheme){
+ ChewingScheme scheme){
context->m_chewing_parser->set_scheme(scheme);
return true;
}
@@ -206,9 +204,8 @@ void pinyin_fini(pinyin_context_t * context){
/* copy from custom to context->m_custom. */
bool pinyin_set_options(pinyin_context_t * context,
- PinyinCustomSettings * custom){
- guint32 option = custom->to_value();
- context->m_custom.from_value(option);
+ pinyin_option_t options){
+ context->m_options = options;
return true;
}
@@ -217,8 +214,9 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
pinyin_instance_t * instance = new pinyin_instance_t;
instance->m_context = context;
- instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(PinyinKey));
- instance->m_pinyin_poses = g_array_new(FALSE, FALSE, sizeof(PinyinKeyPos));
+ instance->m_pinyin_keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
+ instance->m_pinyin_key_rests =
+ g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));
instance->m_constraints = g_array_new
(FALSE, FALSE, sizeof(lookup_constraint_t));
instance->m_match_results =
@@ -229,7 +227,7 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
void pinyin_free_instance(pinyin_instance_t * instance){
g_array_free(instance->m_pinyin_keys, TRUE);
- g_array_free(instance->m_pinyin_poses, TRUE);
+ g_array_free(instance->m_pinyin_key_rests, TRUE);
g_array_free(instance->m_constraints, TRUE);
g_array_free(instance->m_match_results, TRUE);
@@ -239,7 +237,7 @@ void pinyin_free_instance(pinyin_instance_t * instance){
static bool pinyin_update_constraints(pinyin_instance_t * instance){
pinyin_context_t * & context = instance->m_context;
- PinyinKeyVector & pinyin_keys = instance->m_pinyin_keys;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
CandidateConstraints & constraints = instance->m_constraints;
size_t key_len = constraints->len;
@@ -300,12 +298,13 @@ bool pinyin_get_sentence(pinyin_instance_t * instance,
bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
const char * onepinyin,
- PinyinKey * onekey){
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest){
pinyin_context_t * & context = instance->m_context;
int pinyin_len = strlen(onepinyin);
int parse_len = context->m_default_parser->parse_one_key
- ( context->m_validator, *onekey, onepinyin, pinyin_len);
+ ( context->m_options, *onekey, *onekeyrest, onepinyin, pinyin_len);
return pinyin_len == parse_len;
}
@@ -315,20 +314,21 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
int pinyin_len = strlen(pinyins);
int parse_len = context->m_default_parser->parse
- ( context->m_validator, instance->m_pinyin_keys,
- instance->m_pinyin_poses, pinyins, pinyin_len);
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, pinyins, pinyin_len);
return parse_len;
}
bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
const char * onepinyin,
- PinyinKey * onekey){
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest){
pinyin_context_t * & context = instance->m_context;
int pinyin_len = strlen(onepinyin);
int parse_len = context->m_shuang_pin_parser->parse_one_key
- ( context->m_validator, *onekey, onepinyin, pinyin_len);
+ ( context->m_options, *onekey, *onekeyrest, onepinyin, pinyin_len);
return pinyin_len == parse_len;
}
@@ -338,20 +338,21 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
int pinyin_len = strlen(pinyins);
int parse_len = context->m_shuang_pin_parser->parse
- ( context->m_validator, instance->m_pinyin_keys,
- instance->m_pinyin_poses, pinyins, pinyin_len);
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, pinyins, pinyin_len);
return parse_len;
}
bool pinyin_parse_chewing(pinyin_instance_t * instance,
const char * onechewing,
- PinyinKey * onekey){
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest){
pinyin_context_t * & context = instance->m_context;
int chewing_len = strlen(onechewing);
int parse_len = context->m_chewing_parser->parse_one_key
- ( context->m_validator, *onekey, onechewing, chewing_len );
+ ( context->m_options, *onekey, *onekeyrest, onechewing, chewing_len );
return chewing_len == parse_len;
}
@@ -361,8 +362,8 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
int chewing_len = strlen(chewings);
int parse_len = context->m_chewing_parser->parse
- ( context->m_validator, instance->m_pinyin_keys,
- instance->m_pinyin_poses, chewings, chewing_len);
+ ( context->m_options, instance->m_pinyin_keys,
+ instance->m_pinyin_key_rests, chewings, chewing_len);
return parse_len;
}
@@ -370,7 +371,7 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
/* internal definition */
typedef struct {
pinyin_context_t * m_context;
- PinyinKey * m_pinyin_keys;
+ ChewingKey * m_pinyin_keys;
} compare_context;
static gint compare_token( gconstpointer lhs, gconstpointer rhs){
@@ -386,16 +387,16 @@ static gint compare_token_with_unigram_freq(gconstpointer lhs,
phrase_token_t token_rhs = *((phrase_token_t *)rhs);
compare_context * context = (compare_context *)user_data;
FacadePhraseIndex * phrase_index = context->m_context->m_phrase_index;
- PinyinCustomSettings & custom = context->m_context->m_custom;
- PinyinKey * pinyin_keys = context->m_pinyin_keys;
+ pinyin_option_t options = context->m_context->m_options;
+ ChewingKey * pinyin_keys = context->m_pinyin_keys;
PhraseItem item;
phrase_index->get_phrase_item(token_lhs, item);
guint32 freq_lhs = item.get_unigram_frequency() *
- item.get_pinyin_possibility(custom, pinyin_keys) * 256;
+ item.get_pinyin_possibility(options, pinyin_keys) * 256;
phrase_index->get_phrase_item(token_rhs, item);
guint32 freq_rhs = item.get_unigram_frequency() *
- item.get_pinyin_possibility(custom, pinyin_keys) * 256;
+ item.get_pinyin_possibility(options, pinyin_keys) * 256;
return -(freq_lhs - freq_rhs); /* in descendant order */
}
@@ -404,11 +405,11 @@ bool pinyin_get_candidates(pinyin_instance_t * instance,
size_t offset,
TokenVector candidates){
pinyin_context_t * & context = instance->m_context;
- PinyinKeyVector & pinyin_keys = instance->m_pinyin_keys;
+ ChewingKeyVector & pinyin_keys = instance->m_pinyin_keys;
g_array_set_size(candidates, 0);
- PinyinKey * keys = &g_array_index
- (pinyin_keys, PinyinKey, offset);
+ ChewingKey * keys = &g_array_index
+ (pinyin_keys, ChewingKey, offset);
size_t pinyin_len = pinyin_keys->len - offset;
compare_context comp_context;
@@ -552,7 +553,7 @@ bool pinyin_train(pinyin_instance_t * instance){
bool pinyin_reset(pinyin_instance_t * instance){
g_array_set_size(instance->m_pinyin_keys, 0);
- g_array_set_size(instance->m_pinyin_poses, 0);
+ g_array_set_size(instance->m_pinyin_key_rests, 0);
g_array_set_size(instance->m_constraints, 0);
g_array_set_size(instance->m_match_results, 0);
diff --git a/src/pinyin.h b/src/pinyin.h
index 5820532..390e396 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -25,8 +25,9 @@
#include <stdio.h>
#include "novel_types.h"
-#include "pinyin_custom.h"
-#include "pinyin_base.h"
+#include "pinyin_custom2.h"
+#include "chewing_key.h"
+#include "pinyin_parser2.h"
using namespace pinyin;
@@ -36,8 +37,8 @@ typedef struct _pinyin_context_t pinyin_context_t;
typedef struct {
pinyin_context_t * m_context;
- PinyinKeyVector m_pinyin_keys;
- PinyinKeyPosVector m_pinyin_poses;
+ ChewingKeyVector m_pinyin_keys;
+ ChewingKeyRestVector m_pinyin_key_rests;
CandidateConstraints m_constraints;
MatchResults m_match_results;
} pinyin_instance_t;
@@ -45,13 +46,13 @@ typedef struct {
pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir);
bool pinyin_save(pinyin_context_t * context);
bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
- PinyinShuangPinScheme scheme);
+ DoublePinyinScheme scheme);
bool pinyin_set_chewing_scheme(pinyin_context_t * context,
- PinyinZhuYinScheme scheme);
+ ChewingScheme scheme);
void pinyin_fini(pinyin_context_t * context);
bool pinyin_set_options(pinyin_context_t * context,
- PinyinCustomSettings * custom);
+ pinyin_option_t options);
pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context);
void pinyin_free_instance(pinyin_instance_t * instance);
@@ -66,19 +67,22 @@ bool pinyin_get_sentence(pinyin_instance_t * instance,
bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
const char * onepinyin,
- PinyinKey * onekey);
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest);
size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
const char * pinyins);
bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
const char * onepinyin,
- PinyinKey * onekey);
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest);
size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
const char * pinyins);
bool pinyin_parse_chewing(pinyin_instance_t * instance,
const char * onechewing,
- PinyinKey * onekey);
+ ChewingKey * onekey,
+ ChewingKeyRest * onekeyrest);
size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
const char * chewings);
diff --git a/src/pinyin_internal.h b/src/pinyin_internal.h
index 252c733..d4ef233 100644
--- a/src/pinyin_internal.h
+++ b/src/pinyin_internal.h
@@ -26,6 +26,7 @@
#include "pinyin.h"
#include "memory_chunk.h"
+#include "pinyin_base.h"
#include "pinyin_phrase.h"
#include "pinyin_large_table.h"
#include "phrase_large_table.h"