summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-11-01 14:13:21 +0800
committerPeng Wu <alexepico@gmail.com>2013-11-01 14:18:14 +0800
commit1ba2f10f6af8120ef6ec3ad9887c50af7f151c40 (patch)
tree53d1e7ae9a57032f10e7c76f79686a2921a1bb7a /utils
parentd0bde5a5c277da1c1d0ead364c5e31cc26604c75 (diff)
downloadlibzhuyin-1ba2f10f6af8120ef6ec3ad9887c50af7f151c40.tar.gz
libzhuyin-1ba2f10f6af8120ef6ec3ad9887c50af7f151c40.tar.xz
libzhuyin-1ba2f10f6af8120ef6ec3ad9887c50af7f151c40.zip
update gen_zhuyin_table.cpp
Diffstat (limited to 'utils')
-rw-r--r--utils/storage/gen_zhuyin_table.cpp27
1 files changed, 18 insertions, 9 deletions
diff --git a/utils/storage/gen_zhuyin_table.cpp b/utils/storage/gen_zhuyin_table.cpp
index cd98935..2620ed1 100644
--- a/utils/storage/gen_zhuyin_table.cpp
+++ b/utils/storage/gen_zhuyin_table.cpp
@@ -133,21 +133,29 @@ int main(int argc, char * argv[]){
}
void feed_file ( const char * filename){
- char phrase[1024], pinyin[1024];
- guint32 freq;
-
FILE * infile = fopen(filename, "r");
if ( NULL == infile ){
fprintf(stderr, "Can't open file %s.\n", filename);
exit(ENOENT);
}
- while ( !feof(infile)){
- int num = fscanf(infile, "%s %s %u",
- phrase, pinyin, &freq);
+ char * linebuf = NULL; size_t size = 0; ssize_t read;
+ while( (read = getline(&linebuf, &size, infile)) != -1 ){
+ if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
+ linebuf[strlen(linebuf) - 1] = '\0';
+ }
+
+ /* assume tsi.src only use the single space to separate tokens. */
+ gchar ** strs = g_strsplit_set(linebuf, " ", 3);
+
+ const char * phrase = strs[0];
+ guint32 freq = atoi(strs[1]);
+ const char * pinyin = strs[2];
- if (3 != num)
+ if (3 != g_strv_length(strs)) {
+ fprintf(stderr, "wrong line format:%s\n", linebuf);
continue;
+ }
if (feof(infile))
break;
@@ -155,6 +163,7 @@ void feed_file ( const char * filename){
feed_line(phrase, pinyin, freq);
}
+ free(linebuf);
fclose(infile);
}
@@ -174,7 +183,7 @@ void feed_line(const char * phrase, const char * pinyin, const guint32 freq) {
item->uniphrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
- FullPinyinParser2 parser;
+ ChewingDirectParser2 parser;
ChewingKeyVector keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
ChewingKeyRestVector key_rests = g_array_new
(FALSE, FALSE, sizeof(ChewingKeyRest));
@@ -300,7 +309,7 @@ void gen_phrase_file(const char * outputfile, int phrase_index){
(key_rests, ChewingKeyRest, k);
//assert (CHEWING_ZERO_TONE != key.m_tone);
- pinyin = key.get_pinyin_string();
+ pinyin = key.get_bopomofo_string();
g_array_append_val(pinyins, pinyin);
}
gchar * pinyin_str = g_strjoinv("'", (gchar **)pinyins->data);