blob: b39d0128805ef59909666e448393b5d6f53cd9bd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
import os
class MyConfig:
''' My Configuration '''
m_current_epoch = {'SegmentEpoch': 1, \
'GenerateEpoch': 2, \
'EstimateEpoch': 3, \
'PruneEpoch': 4, \
'EvaluateEpoch': 5 \
}
def getEpochs(self):
return self.m_current_epoch
m_trainer_dir = '/media/data/Program/trainer'
def getBaseDir(self):
return self.m_trainer_dir
def getTextDir(self):
return self.m_trainer_dir + os.sep + 'texts'
def getModelDir(self):
return self.m_trainer_dir + os.sep + 'models'
def getFinalModelDir(self):
return self.m_trainer_dir + os.sep + 'finals'
m_tools_dir = '/media/data/Program/trainer/tools/libpinyin'
def getToolsDir(self):
return self.m_tools_dir
m_evals_dir = '/media/data/Program/trainer/evals/libpinyin'
def getEvalsDir(self):
return self.m_evals_dir
def getEstimatesModel(self):
estimates_model = m_tools_dir + '/data/estimates.db'
return estimates_model
def getEstimateIndex(self):
return 'estimate.index'
def getSortedEstimateIndex(self):
return 'estimate.sorted.index'
def getEvalsText(self):
evals_text = m_tools_dir + '/data/evals.text'
return evals_text
def getMinimumFileSize(self):
#about 1,200 Chinese characters
minimum_chinese_characters = 1200
minimum_file_size = minimum_chinese_characters * 3 + \
minimum_chinese_characters / 2
return minimum_file_size
#the trained corpus size of model candidates
def getCandidateModelSize(self):
candidate_model_size = 11.9 * 1024 * 1024
return candidate_model_size
def getModelPostfix(self):
return '.db'
def getCandidateModelName(self, index):
candidate_model_name = "model-candidates-{0}.db"
return candidate_model_name.format(index)
def getMaximumOccursAllowed(self):
return 20
def getMaximumIncreaseRatesAllowed(self):
return 3.
def getSegmentPostfix(self):
return '.segmented'
def getSegmentReportPostfix(self):
return '.segment.report'
#For both index page, item page and binary model file
def getStatusPostfix(self):
return '.status'
def getIndexPostfix(self):
return '.index'
def getTextPostfix(self):
return '.text'
def getFinalModelFileName(self):
return 'interpolation.text'
def getFinalStatusFileName(self):
return 'cwd.status'
|