diff options
author | Peng Wu <alexepico@gmail.com> | 2011-04-25 15:54:42 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-04-25 15:54:42 +0800 |
commit | 54b399a04fff283dec5299fc9f1d66985456754d (patch) | |
tree | 4e60ce7c9ecc769e0b210f7093b364adfafc7bfd /utils/training/k_mixture_model.h | |
parent | 765a730f31d5f8a0ef2b95d2fc9966cdc9f09b46 (diff) | |
download | libpinyin-54b399a04fff283dec5299fc9f1d66985456754d.tar.gz libpinyin-54b399a04fff283dec5299fc9f1d66985456754d.tar.xz libpinyin-54b399a04fff283dec5299fc9f1d66985456754d.zip |
define k mixture model bi-gram
Diffstat (limited to 'utils/training/k_mixture_model.h')
-rw-r--r-- | utils/training/k_mixture_model.h | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index c482a2c..0b19af6 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -24,6 +24,7 @@ #define K_MIXTURE_MODEL #include "novel_types.h" +#include "flexible_ngram.h" namespace pinyin{ @@ -104,6 +105,46 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k, return compute_Pr_G_2(k, alpha, B); } +typedef struct{ + /* the total number of instances of all words. */ + guint32 m_WC; + /* the total number of documents. */ + guint32 m_N; +} KMixtureModelMagicHeader; + +typedef struct{ + /* the total number of instances of word W1. */ + guint32 m_WC; +} KMixtureModelArrayHeader; + +typedef struct{ + /* the total number of all W1,W2 word pair. */ + guint32 m_WC; + + /* the total number of instances of the word or phrases. + (two word phrase) */ + guint32 m_T; /* alias of m_WC, always the same. */ + /* n_r: the number of documents having exactly r occurrences. */ + guint32 m_n_0; + guint32 m_n_1; + + /* maximum instances of the word or phrase (two word phrase) + in previous documents last seen. */ + guint32 m_Mr; +} KMixtureModelArrayItem; + +typedef FlexibleBigram<KMixtureModelMagicHeader, + KMixtureModelArrayHeader, + KMixtureModelArrayItem> +KMixtureModelBigram; + +typedef FlexibleSingleGram<KMixtureModelArrayHeader, + KMixtureModelArrayItem> +KMixtureModelSingleGram; + +typedef KMixtureModelSingleGram::ArrayItemWithToken +KMixtureModelArrayItemWithToken; + }; |