summaryrefslogtreecommitdiffstats
path: root/src/storage/chewing_large_table2.cpp
blob: 66bb5e97c43c6f18d6681d7ff2e4d52a8e6ad36a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/* 
 *  libpinyin
 *  Library to deal with pinyin.
 *  
 *  Copyright (C) 2016 Peng Wu <alexepico@gmail.com>
 *  
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include "chewing_large_table2.h"
#include "pinyin_phrase2.h"
#include "pinyin_phrase3.h"
#include "pinyin_parser2.h"


/* load text method */
bool ChewingLargeTable2::load_text(FILE * infile) {
    char pinyin[256];
    char phrase[256];
    phrase_token_t token;
    size_t freq;

    while (!feof(infile)) {
        int num = fscanf(infile, "%256s %256s %u %ld",
                         pinyin, phrase, &token, &freq);

        if (4 != num)
            continue;

        if(feof(infile))
            break;

        glong len = g_utf8_strlen(phrase, -1);

        PinyinDirectParser2 parser;
        ChewingKeyVector keys;
        ChewingKeyRestVector key_rests;

        keys = g_array_new(FALSE, FALSE, sizeof(ChewingKey));
        key_rests = g_array_new(FALSE, FALSE, sizeof(ChewingKeyRest));

        pinyin_option_t options = USE_TONE;
        parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));

        if (len != keys->len) {
            fprintf(stderr, "ChewingLargeTable::load_text:%s\t%s\t%u\t%ld\n",
                    pinyin, phrase, token, freq);
            continue;
        }

        add_index(keys->len, (ChewingKey *)keys->data, token);

        g_array_free(keys, TRUE);
        g_array_free(key_rests, TRUE);
    }

    return true;
}