summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-08-15 13:32:28 +0800
committerPeng Wu <alexepico@gmail.com>2013-08-15 14:00:26 +0800
commit85bcb959f4500bacbbe43bafd857b66a0abc99e7 (patch)
tree775c6f8366d9b22c1c251bb7f218eff8200634d4
parentcc24a45b4034d3bc4bee45439c6205bf4c70d437 (diff)
downloadlibzhuyin-85bcb959f4500bacbbe43bafd857b66a0abc99e7.tar.gz
libzhuyin-85bcb959f4500bacbbe43bafd857b66a0abc99e7.tar.xz
libzhuyin-85bcb959f4500bacbbe43bafd857b66a0abc99e7.zip
support more pinyins
-rw-r--r--src/libpinyin.ver2
-rw-r--r--src/pinyin.cpp8
-rw-r--r--src/pinyin.h8
-rw-r--r--src/storage/chewing_key.h5
-rw-r--r--src/storage/pinyin_custom2.h14
-rw-r--r--src/storage/pinyin_parser2.cpp70
-rw-r--r--src/storage/pinyin_parser2.h27
7 files changed, 93 insertions, 41 deletions
diff --git a/src/libpinyin.ver b/src/libpinyin.ver
index aa5027c..a05b81c 100644
--- a/src/libpinyin.ver
+++ b/src/libpinyin.ver
@@ -28,7 +28,7 @@ LIBPINYIN {
pinyin_lookup_tokens;
pinyin_train;
pinyin_reset;
- pinyin_get_chewing_string;
+ pinyin_get_bopomofo_string;
pinyin_get_pinyin_string;
pinyin_token_get_phrase;
pinyin_token_get_n_pronunciation;
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 885eceb..886d14b 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1419,14 +1419,14 @@ bool pinyin_reset(pinyin_instance_t * instance){
return true;
}
-bool pinyin_get_chewing_string(pinyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str) {
+bool pinyin_get_bopomofo_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str) {
*utf8_str = NULL;
if (0 == key->get_table_index())
return false;
- *utf8_str = key->get_chewing_string();
+ *utf8_str = key->get_bopomofo_string();
return true;
}
diff --git a/src/pinyin.h b/src/pinyin.h
index d157e79..243b81c 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -380,7 +380,7 @@ bool pinyin_train(pinyin_instance_t * instance);
bool pinyin_reset(pinyin_instance_t * instance);
/**
- * pinyin_get_chewing_string:
+ * pinyin_get_bopomofo_string:
* @instance: the pinyin instance.
* @key: the chewing key.
* @utf8_str: the chewing string.
@@ -389,9 +389,9 @@ bool pinyin_reset(pinyin_instance_t * instance);
* Get the chewing string of the key.
*
*/
-bool pinyin_get_chewing_string(pinyin_instance_t * instance,
- ChewingKey * key,
- gchar ** utf8_str);
+bool pinyin_get_bopomofo_string(pinyin_instance_t * instance,
+ ChewingKey * key,
+ gchar ** utf8_str);
/**
* pinyin_get_pinyin_string:
diff --git a/src/storage/chewing_key.h b/src/storage/chewing_key.h
index fc8a64e..a043404 100644
--- a/src/storage/chewing_key.h
+++ b/src/storage/chewing_key.h
@@ -24,6 +24,7 @@
#include <glib.h>
#include "chewing_enum.h"
+#include "pinyin_custom2.h"
using namespace pinyin;
@@ -65,8 +66,8 @@ public:
gint get_table_index();
/* Note: the return value should be freed by g_free. */
- gchar * get_pinyin_string();
- gchar * get_chewing_string();
+ gchar * get_pinyin_string(PinyinScheme scheme = PINYIN_DEFAULT);
+ gchar * get_bopomofo_string();
};
typedef struct _ChewingKey ChewingKey;
diff --git a/src/storage/pinyin_custom2.h b/src/storage/pinyin_custom2.h
index 6ae0c20..4212745 100644
--- a/src/storage/pinyin_custom2.h
+++ b/src/storage/pinyin_custom2.h
@@ -30,7 +30,7 @@ G_BEGIN_DECLS
* PinyinTableFlag:
*/
enum PinyinTableFlag{
- IS_CHEWING = 1U << 1,
+ IS_BOPOMOFO = 1U << 1,
IS_PINYIN = 1U << 2,
PINYIN_INCOMPLETE = 1U << 3,
CHEWING_INCOMPLETE = 1U << 4,
@@ -61,6 +61,18 @@ enum PinyinAmbiguity2{
};
/**
+ * @brief enums of Pinyin Schemes.
+ */
+
+enum PinyinScheme
+{
+ PINYIN_HANYU = 1,
+ PINYIN_LUOMA = 2,
+ PINYIN_SECONDARY_BOPOMOFO = 3,
+ PINYIN_DEFAULT = PINYIN_HANYU
+};
+
+/**
* @brief enums of Chewing Schemes.
*/
enum ChewingScheme
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index fd5f57a..adec727 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -61,7 +61,7 @@ static bool check_pinyin_options(pinyin_option_t options, const pinyin_index_ite
static bool check_chewing_options(pinyin_option_t options, const chewing_index_item_t * item) {
guint32 flags = item->m_flags;
- assert (flags & IS_CHEWING);
+ assert (flags & IS_BOPOMOFO);
/* handle incomplete chewing. */
if (flags & CHEWING_INCOMPLETE) {
@@ -82,34 +82,49 @@ gint _ChewingKey::get_table_index() {
return index == -1 ? 0 : index;
}
-gchar * _ChewingKey::get_pinyin_string() {
+gchar * _ChewingKey::get_pinyin_string(PinyinScheme scheme) {
assert(m_tone < CHEWING_NUMBER_OF_TONES);
gint index = get_table_index();
assert(index < G_N_ELEMENTS(content_table));
const content_table_item_t & item = content_table[index];
+ const char * pinyin_str = NULL;
+
+ switch(scheme) {
+ case PINYIN_HANYU:
+ pinyin_str = item.m_hanyu_pinyin;
+ break;
+ case PINYIN_LUOMA:
+ pinyin_str = item.m_luoma_pinyin;
+ break;
+ case PINYIN_SECONDARY_BOPOMOFO:
+ pinyin_str = item.m_secondary_bopomofo;
+ break;
+ default:
+ assert(false);
+ }
+
if (CHEWING_ZERO_TONE == m_tone) {
- return g_strdup(item.m_pinyin_str);
+ return g_strdup(pinyin_str);
} else {
- return g_strdup_printf("%s%d", item.m_pinyin_str, m_tone);
+ return g_strdup_printf("%s%d", pinyin_str, m_tone);
}
}
-gchar * _ChewingKey::get_chewing_string() {
+gchar * _ChewingKey::get_bopomofo_string() {
assert(m_tone < CHEWING_NUMBER_OF_TONES);
gint index = get_table_index();
assert(index < G_N_ELEMENTS(content_table));
const content_table_item_t & item = content_table[index];
if (CHEWING_ZERO_TONE == m_tone) {
- return g_strdup(item.m_chewing_str);
+ return g_strdup(item.m_bopomofo);
} else {
- return g_strdup_printf("%s%s", item.m_chewing_str,
+ return g_strdup_printf("%s%s", item.m_bopomofo,
chewing_tone_table[m_tone]);
}
}
-
/* Pinyin Parsers */
/* internal information for pinyin parsers. */
@@ -141,6 +156,8 @@ static bool compare_pinyin_less_than(const pinyin_index_item_t & lhs,
}
static inline bool search_pinyin_index(pinyin_option_t options,
+ const pinyin_index_item_t * pinyin_index,
+ size_t len,
const char * pinyin,
ChewingKey & key){
pinyin_index_item_t item;
@@ -150,7 +167,7 @@ static inline bool search_pinyin_index(pinyin_option_t options,
std_lite::pair<const pinyin_index_item_t *,
const pinyin_index_item_t *> range;
range = std_lite::equal_range
- (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
+ (pinyin_index, pinyin_index + len,
item, compare_pinyin_less_than);
guint16 range_len = range.second - range.first;
@@ -175,6 +192,8 @@ static bool compare_chewing_less_than(const chewing_index_item_t & lhs,
}
static inline bool search_chewing_index(pinyin_option_t options,
+ const chewing_index_item_t * chewing_index,
+ size_t len,
const char * chewing,
ChewingKey & key){
chewing_index_item_t item;
@@ -184,7 +203,7 @@ static inline bool search_chewing_index(pinyin_option_t options,
std_lite::pair<const chewing_index_item_t *,
const chewing_index_item_t *> range;
range = std_lite::equal_range
- (chewing_index, chewing_index + G_N_ELEMENTS(chewing_index),
+ (chewing_index, chewing_index + len,
item, compare_chewing_less_than);
guint16 range_len = range.second - range.first;
@@ -207,8 +226,9 @@ static inline bool search_chewing_index(pinyin_option_t options,
/* Full Pinyin Parser */
FullPinyinParser2::FullPinyinParser2 (){
m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
-}
+ set_scheme(PINYIN_DEFAULT);
+}
bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
ChewingKey & key,
@@ -235,7 +255,8 @@ bool FullPinyinParser2::parse_one_key (pinyin_option_t options,
/* Note: optimize here? */
input[parsed_len] = '\0';
- if (!search_pinyin_index(options, input, key)) {
+ if (!search_pinyin_index(options, m_pinyin_index, m_pinyin_index_len,
+ input, key)) {
g_free(input);
return false;
}
@@ -389,6 +410,27 @@ int FullPinyinParser2::final_step(size_t step_len, ChewingKeyVector & keys,
return parsed_len;
}
+bool FullPinyinParser2::set_scheme(PinyinScheme scheme){
+ switch(scheme){
+ case PINYIN_HANYU:
+ m_pinyin_index = hanyu_pinyin_index;
+ m_pinyin_index_len = G_N_ELEMENTS(hanyu_pinyin_index);
+ break;
+ case PINYIN_LUOMA:
+ m_pinyin_index = luoma_pinyin_index;
+ m_pinyin_index_len = G_N_ELEMENTS(luoma_pinyin_index);
+ break;
+ case PINYIN_SECONDARY_BOPOMOFO:
+ m_pinyin_index = second_bopomofo_index;
+ m_pinyin_index_len = G_N_ELEMENTS(second_bopomofo_index);
+ break;
+ default:
+ assert(false);
+ }
+ return true;
+}
+
+
/* the chewing string must be freed with g_free. */
static bool search_chewing_symbols(const chewing_symbol_item_t * symbol_table,
const char key, const char ** chewing) {
@@ -455,7 +497,9 @@ bool ChewingParser2::parse_one_key(pinyin_option_t options,
}
/* search the chewing in the chewing index table. */
- if (chewing && search_chewing_index(options, chewing, key)) {
+ if (chewing && search_chewing_index(options, bopomofo_index,
+ G_N_ELEMENTS(bopomofo_index),
+ chewing, key)) {
/* save back tone if available. */
key.m_tone = tone;
g_free(chewing);
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
index feee45b..b402523 100644
--- a/src/storage/pinyin_parser2.h
+++ b/src/storage/pinyin_parser2.h
@@ -30,8 +30,10 @@
namespace pinyin{
typedef struct {
- const char * m_pinyin_str;
- const char * m_chewing_str;
+ const char * m_hanyu_pinyin;
+ const char * m_bopomofo;
+ const char * m_luoma_pinyin;
+ const char * m_secondary_bopomofo;
ChewingKey m_chewing_key;
} content_table_item_t;
@@ -48,20 +50,6 @@ typedef struct {
} chewing_index_item_t;
typedef struct {
- const char * m_orig_key;
- guint32 m_orig_freq;
- const char * m_new_keys[2];
- guint32 m_new_freq;
-} divided_table_item_t;
-
-typedef struct {
- const char * m_orig_keys[2];
- guint32 m_orig_freq;
- const char * m_new_keys[2];
- guint32 m_new_freq;
-} resplit_table_item_t;
-
-typedef struct {
const char * m_shengmu;
} double_pinyin_scheme_shengmu_item_t;
@@ -90,6 +78,10 @@ typedef GArray * ParseValueVector;
*/
class PinyinParser2
{
+protected:
+ const pinyin_index_item_t * m_pinyin_index;
+ size_t m_pinyin_index_len;
+
public:
/**
* PinyinParser2::~PinyinParser2:
@@ -161,6 +153,9 @@ public:
* the parse method will use dynamic programming to drive parse_one_key.
*/
virtual int parse(pinyin_option_t options, ChewingKeyVector & keys, ChewingKeyRestVector & key_rests, const char *str, int len) const;
+
+public:
+ bool set_scheme(PinyinScheme scheme);
};
/**