summaryrefslogtreecommitdiffstats
path: root/utils/training
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2011-05-04 11:25:00 +0800
committerPeng Wu <alexepico@gmail.com>2011-05-04 11:25:00 +0800
commit2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0 (patch)
treefe0353c2d26289cb0c185883a675afc8f3b4349d /utils/training
parente85c6ac35846b87a794f8b2d85cbe472b3c7e8b7 (diff)
downloadlibpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.tar.gz
libpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.tar.xz
libpinyin-2358bbb3d0348dd940fa10fd40dbe0f2af6d9db0.zip
refine k mixture model
Diffstat (limited to 'utils/training')
-rw-r--r--utils/training/k_mixture_model.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h
index af613f5..6b3a75d 100644
--- a/utils/training/k_mixture_model.h
+++ b/utils/training/k_mixture_model.h
@@ -124,11 +124,18 @@ typedef struct{
/* the total number of all W1,W2 word pair. */
guint32 m_WC;
- /* the total number of instances of the word or phrases.
+ /* the total number of instances of the word or phrase.
(two word phrase) */
- guint32 m_T; /* alias of m_WC, always the same. */
+ /* guint32 m_T; Please use m_WC instead.
+ alias of m_WC, always the same. */
+
/* n_r: the number of documents having exactly r occurrences. */
- guint32 m_n_0;
+ /* guint32 m_n_0;
+ Note: compute this value using the following equation.
+ m_n_0 = KMixtureModelMagicHeader.m_N - m_N_n_0;
+ m_N_n_0, the number of documents which contains the word or phrase.
+ (two word phrase) */
+ guint m_N_n_0;
guint32 m_n_1;
/* maximum instances of the word or phrase (two word phrase)