summaryrefslogtreecommitdiffstats
path: root/src/storage/pinyin_parser2.cpp
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-11-15 17:07:56 +0800
committerPeng Wu <alexepico@gmail.com>2011-11-15 17:33:58 +0800
commit4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841 (patch)
treee13861d630f78a2b08f1f292feb3742db0352f6a /src/storage/pinyin_parser2.cpp
parent5501ea429dd50330caa9cd6ffbd8236d1663fa6e (diff)
downloadlibpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.tar.gz
libpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.tar.xz
libpinyin-4e5e619a20c4d7c9cc1229a2f3e26a7219bf6841.zip
begin to write full pinyin parser2 parse_one_key
Diffstat (limited to 'src/storage/pinyin_parser2.cpp')
-rw-r--r--src/storage/pinyin_parser2.cpp75
1 files changed, 73 insertions, 2 deletions
diff --git a/src/storage/pinyin_parser2.cpp b/src/storage/pinyin_parser2.cpp
index ea180fe..c7bf71b 100644
--- a/src/storage/pinyin_parser2.cpp
+++ b/src/storage/pinyin_parser2.cpp
@@ -19,7 +19,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+
+#include <ctype.h>
#include <assert.h>
+#include <string.h>
+#include "stl_lite.h"
#include "pinyin_custom2.h"
#include "chewing_key.h"
#include "pinyin_parser2.h"
@@ -28,7 +32,7 @@
using namespace pinyin;
-static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) {
+static bool check_pinyin_options(guint32 options, const pinyin_index_item_t * item) {
guint32 flags = item->m_flags;
assert (flags & IS_PINYIN);
@@ -50,7 +54,7 @@ static bool check_pinyin_options(guint32 options, pinyin_index_item_t * item) {
return true;
}
-static bool check_chewing_options(guint32 options, chewing_index_item_t * item) {
+static bool check_chewing_options(guint32 options, const chewing_index_item_t * item) {
guint32 flags = item->m_flags;
assert (flags & IS_CHEWING);
@@ -63,6 +67,8 @@ static bool check_chewing_options(guint32 options, chewing_index_item_t * item)
return true;
}
+
+/* methods for Chewing Keys to access pinyin parser table. */
const char * ChewingKeyRest::get_pinyin_string(){
if (m_index == 0)
return NULL;
@@ -80,3 +86,68 @@ const char * ChewingKeyRest::get_chewing_string(){
assert(m_index < G_N_ELEMENTS(content_table));
return content_table[m_index].m_chewing_str;
}
+
+
+static bool compare_less_than(const pinyin_index_item_t & lhs,
+ const pinyin_index_item_t & rhs){
+ return 0 > strcmp(lhs.m_pinyin_input, rhs.m_pinyin_input);
+}
+
+int FullPinyinParser2::parse_one_key (guint32 options, ChewingKey & key,
+ ChewingKeyRest & key_rest,
+ const char * pinyin, int len) const {
+ /* "'" are not accepted in parse_one_key. */
+ assert(NULL == strchr(pinyin, '\''));
+ gchar * input = g_strndup(pinyin, len);
+
+ guint16 tone = CHEWING_ZERO_TONE; guint16 tone_pos = 0;
+ guint16 parsed_len = len;
+ key = ChewingKey(); key_rest = ChewingKeyRest();
+
+ /* find the tone in the last character. */
+ char chr = input[parsed_len - 1];
+ if ( '0' < chr && chr <= '5' ) {
+ tone = chr - '0';
+ parsed_len --;
+ tone_pos = parsed_len;
+ }
+
+ /* parse pinyin core staff here. */
+ pinyin_index_item_t item;
+ memset(&item, 0, sizeof(item));
+
+ for (; parsed_len > 0; --parsed_len) {
+ input[parsed_len] = '\0';
+ item.m_pinyin_input = input;
+ std_lite::pair<const pinyin_index_item_t *,
+ const pinyin_index_item_t *> range;
+ range = std_lite::equal_range
+ (pinyin_index, pinyin_index + G_N_ELEMENTS(pinyin_index),
+ item, compare_less_than);
+
+ guint16 len = range.second - range.first;
+ assert (len <= 1);
+ if ( len == 1 ) {
+ const pinyin_index_item_t * index = range.first;
+
+ if (!check_pinyin_options(options, index))
+ continue;
+
+ key_rest.m_index = index->m_table_index;
+ key = content_table[key_rest.m_index].m_chewing_key;
+ break;
+ }
+ }
+
+ /* post processing tone. */
+ if ( parsed_len == tone_pos ) {
+ if (tone != CHEWING_ZERO_TONE) {
+ key.m_tone = tone;
+ parsed_len ++;
+ }
+ }
+
+ key_rest.m_raw_begin = 0; key_rest.m_raw_end = parsed_len;
+ g_free(input);
+ return parsed_len;
+}