summaryrefslogtreecommitdiffstats
path: root/src/storage
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-16 14:26:32 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-16 14:26:32 +0800
commit5af041d0ecb0eeff49be8c35a2223e8458a93120 (patch)
tree0775443793e5b961dd7b2aa4e08fe6621fda495b /src/storage
parent4794f487807d6f9ab26df198e27edb46e8353b31 (diff)
downloadlibpinyin-5af041d0ecb0eeff49be8c35a2223e8458a93120.tar.gz
libpinyin-5af041d0ecb0eeff49be8c35a2223e8458a93120.tar.xz
libpinyin-5af041d0ecb0eeff49be8c35a2223e8458a93120.zip
write full pinyin parser in progress
Diffstat (limited to 'src/storage')
-rw-r--r--src/storage/pinyin_parser2.cpp62
-rw-r--r--src/storage/pinyin_parser2.h8
2 files changed, 68 insertions, 2 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index 6ace7ce..6ba15bd 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -88,6 +88,32 @@ const char * ChewingKeyRest::get_chewing_string(){
}
+/* Pinyin Parsers */
+
+/* internal information for pinyin parsers. */
+struct parse_value_t{
+ ChewingKey m_key;
+ ChewingKeyRest m_key_rest;
+ gint16 m_num_keys;
+ gint16 m_parsed_len;
+ gint16 m_last_step;
+
+ /* constructor */
+public:
+ parse_value_t(){
+ m_num_keys = 0;
+ m_parsed_len = 0;
+ m_last_step = 0;
+ }
+};
+
+/* Full Pinyin Parser */
+FullPinyinParser2::FullPinyinParser2 (){
+ m_parse_steps = g_array_new(TRUE, FALSE, sizeof(parse_value_t));
+}
+
+const guint16 max_full_pinyin_length = 7; /* include tone. */
+
static bool compare_less_than(const pinyin_index_item_t & lhs,
const pinyin_index_item_t & rhs){
return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
@@ -160,5 +186,39 @@ int FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
int FullPinyinParser2::parse (guint32 options, ChewingKeyVector & keys,
ChewingKeyRestVector & key_rests,
const char *str, int len) const {
- assert(FALSE);
+ size_t i;
+ /* clear arrays. */
+ g_array_set_size(keys, 0);
+ g_array_set_size(key_rests, 0);
+
+ /* init m_parse_steps. */
+ int step_len = len + 1;
+ g_array_set_size(m_parse_steps, 0);
+ parse_value_t onestep;
+ for (i = 0; i < step_len; ++i) {
+ g_array_append_val(m_parse_steps, onestep);
+ }
+
+ /* split "'" here. */
+ gchar * input = g_strndup(str, len);
+ gchar ** inputs = g_strsplit(input, "'", -1);
+ g_free(input);
+ /* parse each input */
+ for (i = 0; inputs[i]; ++i) {
+ input = inputs[i];
+ /* dynamic programming here. */
+ size_t str_len = strlen(input);
+ for (size_t m = 0; m < str_len; ++m) {
+ size_t try_len = std_lite::min
+ (m + max_full_pinyin_length, str_len);
+ for (size_t n = m + 1; n < try_len + 1; ++n) {
+ /* gen next step */
+ }
+ }
+ }
+ g_strfreev(inputs);
+
+ /* post processing for re-split table. */
+
+ /* final step for back tracing. */
}
diff --git a/src/storage/pinyin_parser2.h b/src/storage/pinyin_parser2.h
index 67bf0b0..1ac47c0 100644
--- a/src/storage/pinyin_parser2.h
+++ b/src/storage/pinyin_parser2.h
@@ -65,6 +65,7 @@ typedef struct {
typedef GArray * ChewingKeyVector;
typedef GArray * ChewingKeyRestVector;
+typedef GArray * ParseValueVector;
/**
@@ -115,8 +116,13 @@ class FullPinyinParser2 : public PinyinParser2
{
/* Note: some internal pointers to full pinyin table. */
+protected:
+ ParseValueVector m_parse_steps;
public:
- virtual ~FullPinyinParser2 () {}
+ FullPinyinParser2 ();
+ virtual ~FullPinyinParser2 () {
+ g_array_free(m_parse_steps, TRUE);
+ }
virtual int parse_one_key (guint32 options, ChewingKey & key, ChewingKeyRest & key_rest, const char *str, int len) const;