summaryrefslogtreecommitdiffstats
path: root/utils/training/k_mixture_model.h
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-04-25 15:54:42 +0800
committerPeng Wu <alexepico@gmail.com>2011-04-25 15:54:42 +0800
commit54b399a04fff283dec5299fc9f1d66985456754d (patch)
tree4e60ce7c9ecc769e0b210f7093b364adfafc7bfd /utils/training/k_mixture_model.h
parent765a730f31d5f8a0ef2b95d2fc9966cdc9f09b46 (diff)
downloadlibpinyin-54b399a04fff283dec5299fc9f1d66985456754d.tar.gz
libpinyin-54b399a04fff283dec5299fc9f1d66985456754d.tar.xz
libpinyin-54b399a04fff283dec5299fc9f1d66985456754d.zip
define k mixture model bi-gram
Diffstat (limited to 'utils/training/k_mixture_model.h')
-rw-r--r--utils/training/k_mixture_model.h41
1 files changed, 41 insertions, 0 deletions
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index c482a2c..0b19af6 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -24,6 +24,7 @@
#define K_MIXTURE_MODEL
#include "novel_types.h"
+#include "flexible_ngram.h"
namespace pinyin{
@@ -104,6 +105,46 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k,
return compute_Pr_G_2(k, alpha, B);
}
+typedef struct{
+ /* the total number of instances of all words. */
+ guint32 m_WC;
+ /* the total number of documents. */
+ guint32 m_N;
+} KMixtureModelMagicHeader;
+
+typedef struct{
+ /* the total number of instances of word W1. */
+ guint32 m_WC;
+} KMixtureModelArrayHeader;
+
+typedef struct{
+ /* the total number of all W1,W2 word pair. */
+ guint32 m_WC;
+
+ /* the total number of instances of the word or phrases.
+ (two word phrase) */
+ guint32 m_T; /* alias of m_WC, always the same. */
+ /* n_r: the number of documents having exactly r occurrences. */
+ guint32 m_n_0;
+ guint32 m_n_1;
+
+ /* maximum instances of the word or phrase (two word phrase)
+ in previous documents last seen. */
+ guint32 m_Mr;
+} KMixtureModelArrayItem;
+
+typedef FlexibleBigram<KMixtureModelMagicHeader,
+ KMixtureModelArrayHeader,
+ KMixtureModelArrayItem>
+KMixtureModelBigram;
+
+typedef FlexibleSingleGram<KMixtureModelArrayHeader,
+ KMixtureModelArrayItem>
+KMixtureModelSingleGram;
+
+typedef KMixtureModelSingleGram::ArrayItemWithToken
+KMixtureModelArrayItemWithToken;
+
};