summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeng Wu <pwu@redhat.com>2024-09-02 11:43:16 +0800
committerPeng Wu <pwu@redhat.com>2024-09-02 14:25:05 +0800
commitb1927376735c0c1042d72af2eca0e25c53595724 (patch)
tree7c86a436a91aa0a51c1736770280a08f326b3388
parent429f9fd8dd2966db9038a4194e5970ba22e8e5c0 (diff)
downloadtrainer-main.tar.gz
trainer-main.tar.xz
trainer-main.zip
update myconfig.pyHEADpunctmain
-rw-r--r--lib/myconfig.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/lib/myconfig.py b/lib/myconfig.py
index af79eca..b43d52a 100644
--- a/lib/myconfig.py
+++ b/lib/myconfig.py
@@ -15,6 +15,7 @@ class MyConfig:
'NewWordEpoch': 1, \
'MarkPinyinEpoch': 1, \
'MergeSequenceEpoch': 1, \
+ 'PunctuationEpoch': 1, \
}
def getEpochs(self):
@@ -186,3 +187,31 @@ class MyConfig:
def getMinimumPinyinFrequency(self):
return 3
+
+ '''
+ Generate Punctuation Configuration
+ '''
+
+ def getGeneratePunctuationDir(self):
+ return self.m_trainer_dir + os.sep + 'genpunct'
+
+ def getPunctuationPerIndexFileName(self):
+ return 'punctuation-index.text'
+
+ def getPunctuationPruneIndexFileName(self):
+ return 'punctuation-prune.text'
+
+ def getPunctuationAllIndexFileName(self):
+ return 'punctuation-all.text'
+
+ def getPunctuationPruneAllIndexFileName(self):
+ return 'punctuation-all-prune.text'
+
+ def getPunctuationTextFileName(self):
+ return 'puncts.table'
+
+ def getPunctuationPerIndexPruneThreshold(self):
+ return 5 * 100
+
+ def getPunctuationAllIndexPruneThreshold(self):
+ return self.getPunctuationPerIndexPruneThreshold() * 20