diff options
author | Peng Wu <alexepico@gmail.com> | 2016-02-29 14:46:31 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2016-02-29 14:46:31 +0800 |
commit | 571322984a1b87bf6341492d7289a77d13a5a9d6 (patch) | |
tree | b62008db72d691ce18d1545f920bcf239499f563 /src | |
parent | 9385abef7168ef63cdab6c56f5181574c87cee26 (diff) | |
download | libpinyin-571322984a1b87bf6341492d7289a77d13a5a9d6.tar.gz libpinyin-571322984a1b87bf6341492d7289a77d13a5a9d6.tar.xz libpinyin-571322984a1b87bf6341492d7289a77d13a5a9d6.zip |
write load_text method
Diffstat (limited to 'src')
-rw-r--r-- | src/storage/chewing_large_table2.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/storage/chewing_large_table2.cpp b/src/storage/chewing_large_table2.cpp index db0cef6..66bb5e9 100644 --- a/src/storage/chewing_large_table2.cpp +++ b/src/storage/chewing_large_table2.cpp @@ -20,3 +20,51 @@ */ #include "chewing_large_table2.h" +#include "pinyin_phrase2.h" +#include "pinyin_phrase3.h" +#include "pinyin_parser2.h" + + +/* load text method */ +bool ChewingLargeTable2::load_text(FILE * infile) { + char pinyin[256]; + char phrase[256]; + phrase_token_t token; + size_t freq; + + while (!feof(infile)) { + int num = fscanf(infile, "%256s %256s %u %ld", + pinyin, phrase, &token, &freq); + + if (4 != num) + continue; + + if(feof(infile)) + break; + + glong len = g_utf8_strlen(phrase, -1); + + PinyinDirectParser2 parser; + ChewingKeyVector keys; + ChewingKeyRestVector key_rests; + + keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey)); + key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest)); + + pinyin_option_t options = USE_TONE; + parser.parse(options, keys, key_rests, pinyin, strlen(pinyin)); + + if (len != keys->len) { + fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n", + pinyin, phrase, token, freq); + continue; + } + + add_index(keys->len, (ChewingKey *)keys->data, token); + + g_array_free(keys, TRUE); + g_array_free(key_rests, TRUE); + } + + return true; +} |