blob: 3f2b53266d9c0d76324f2a4e418bae1961dd0471 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
set(
BINARY_MODEL_DATA
gb_char.bin
gbk_char.bin
phrase_index.bin
pinyin_index.bin
bigram.db
)
set(
BINARY_MODEL_DATA_FILES
${CMAKE_BINARY_DIR}/data/gb_char.bin
${CMAKE_BINARY_DIR}/data/gbk_char.bin
${CMAKE_BINARY_DIR}/data/phrase_index.bin
${CMAKE_BINARY_DIR}/data/pinyin_index.bin
${CMAKE_BINARY_DIR}/data/bigram.db
)
set(
gen_binary_files_BIN
${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files
)
set(
import_interpolation_BIN
${CMAKE_BINARY_DIR}/utils/storage/import_interpolation
)
set(
gen_unigram_BIN
${CMAKE_BINARY_DIR}/utils/training/gen_unigram
)
add_custom_target(
data
ALL
DEPENDS
${BINARY_MODEL_DATA}
)
add_custom_command(
OUTPUT
${CMAKE_SOURCE_DIR}/data/gb_char.table
${CMAKE_SOURCE_DIR}/data/gbk_char.table
${CMAKE_SOURCE_DIR}/data/interpolation.text
COMMENT
"Downloading textual model data..."
COMMAND
wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
COMMAND
tar xvf model.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
)
add_custom_command(
OUTPUT
gb_char.bin
gbk_char.bin
phrase_index.bin
pinyin_index.bin
COMMENT
"Building binary model data..."
COMMAND
${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
DEPENDS
gen_binary_files
${CMAKE_SOURCE_DIR}/data/gb_char.table
${CMAKE_SOURCE_DIR}/data/gbk_char.table
)
add_custom_command(
OUTPUT
bigram.db
COMMENT
"Building binary bigram data..."
COMMAND
${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation.text
COMMAND
${gen_unigram_BIN}
DEPENDS
import_interpolation
${CMAKE_SOURCE_DIR}/data/interpolation.text
)
install(
FILES
${BINARY_MODEL_DATA_FILES}
DESTINATION
${DIR_SHARE_LIBPINYIN}/data
)
set_directory_properties(
PROPERTIES
ADDITIONAL_MAKE_CLEAN_FILES
${BINARY_MODEL_DATA_FILES}
)
|