1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
/*
* libpinyin
* Library to deal with pinyin.
*
* Copyright (C) 2006-2007 Peng Wu
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef NGRAM_H
#define NGRAM_H
#include <db.h>
namespace novel{
class Bigram;
/* Note:
* When transfer from system ngram to user ngram,
* if user ngram doesn't exist,
* copy total freq from system ngram to user ngram,
* so the total freq exists.
* if item freq don't exist, copy item freq from system to user ngram,
* so the item freq exists.
* if user ngram already exists(always true), increases the total freq,
* if item ngram already exists(always true), increases the freq.
*/
class SingleGram{
friend class Bigram;
private:
MemoryChunk m_chunk;
SingleGram(void * buffer, size_t length);
public:
/* Null Constructor */
SingleGram();
/* retrieve all items */
bool retrieve_all(/* out */ BigramPhraseWithCountArray array);
/* search method */
/* the array result contains many items */
bool search(/* in */ PhraseIndexRange * range,
/* out */ BigramPhraseArray array);
bool get_freq(/* in */ phrase_token_t token,
/* out */ guint32 & freq);
/* set_freq method
*/
bool set_freq(/* in */ phrase_token_t token,
guint32 freq);
/* set_total_freq method
* used in user bigram table
*/
bool set_total_freq(guint32 m_total);
/* get_total_freq method
* used in user bigram table
*/
bool get_total_freq(guint32 & m_total);
/* prune one method
* only used in training
*/
bool prune();
};
class Bigram{
private:
DB * m_system;
DB * m_user;
public:
Bigram(){
m_system = NULL; m_user = NULL;
}
~Bigram(){
reset();
}
void reset(){
if ( m_system ){
m_system->close(m_system, 0);
m_system = NULL;
}
if ( m_user ){
m_user->close(m_user, 0);
m_user = NULL;
}
}
/* attach system and user bi-gram */
/* when with training systemdb is NULL, only user_gram */
bool attach(const char * systemfile, const char * userfile);
bool load(phrase_token_t index, SingleGram * & system_gram, SingleGram * & user_gram);
bool store(phrase_token_t index, SingleGram * user_gram);
/* array of phrase_token_t items, for parameter estimation. */
bool get_all_items(GArray * system, GArray * user);
};
};
using namespace novel;
#endif
|