diff options
author | Peng Wu <alexepico@gmail.com> | 2011-05-04 11:25:00 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2011-05-04 11:25:00 +0800 |
commit | 2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0 (patch) | |
tree | fe0353c2d26289cb0c185883a675afc8f3b4349d /utils/training | |
parent | e85c6ac35846b87a794f8b2d85cbe472b3c7e8b7 (diff) | |
download | libpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.tar.gz libpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.tar.xz libpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.zip |
refine k mixture model
Diffstat (limited to 'utils/training')
-rw-r--r-- | utils/training/k_mixture_model.h | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index af613f5..6b3a75d 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -124,11 +124,18 @@ typedef struct{ /* the total number of all W1,W2 word pair. */ guint32 m_WC; - /* the total number of instances of the word or phrases. + /* the total number of instances of the word or phrase. (two word phrase) */ - guint32 m_T; /* alias of m_WC, always the same. */ + /* guint32 m_T; Please use m_WC instead. + alias of m_WC, always the same. */ + /* n_r: the number of documents having exactly r occurrences. */ - guint32 m_n_0; + /* guint32 m_n_0; + Note: compute this value using the following equation. + m_n_0 = KMixtureModelMagicHeader.m_N - m_N_n_0; + m_N_n_0, the number of documents which contains the word or phrase. + (two word phrase) */ + guint m_N_n_0; guint32 m_n_1; /* maximum instances of the word or phrase (two word phrase) |