summaryrefslogtreecommitdiffstats
path: root/data/CMakeLists.txt
blob: 3f2b53266d9c0d76324f2a4e418bae1961dd0471 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
set(
    BINARY_MODEL_DATA
    gb_char.bin
    gbk_char.bin
    phrase_index.bin
    pinyin_index.bin
    bigram.db
)

set(
    BINARY_MODEL_DATA_FILES
    ${CMAKE_BINARY_DIR}/data/gb_char.bin
    ${CMAKE_BINARY_DIR}/data/gbk_char.bin
    ${CMAKE_BINARY_DIR}/data/phrase_index.bin
    ${CMAKE_BINARY_DIR}/data/pinyin_index.bin
    ${CMAKE_BINARY_DIR}/data/bigram.db
)

set(
    gen_binary_files_BIN
    ${CMAKE_BINARY_DIR}/utils/storage/gen_binary_files
)

set(
    import_interpolation_BIN
    ${CMAKE_BINARY_DIR}/utils/storage/import_interpolation
)

set(
    gen_unigram_BIN
    ${CMAKE_BINARY_DIR}/utils/training/gen_unigram
)

add_custom_target(
    data
    ALL
    DEPENDS
        ${BINARY_MODEL_DATA}
)

add_custom_command(
    OUTPUT
        ${CMAKE_SOURCE_DIR}/data/gb_char.table
        ${CMAKE_SOURCE_DIR}/data/gbk_char.table
        ${CMAKE_SOURCE_DIR}/data/interpolation.text
    COMMENT
        "Downloading textual model data..."
    COMMAND
       wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
    COMMAND
       tar xvf model.text.tar.gz -C ${CMAKE_SOURCE_DIR}/data
)

add_custom_command(
    OUTPUT
        gb_char.bin
        gbk_char.bin
        phrase_index.bin
        pinyin_index.bin
    COMMENT
        "Building binary model data..."
    COMMAND
        ${gen_binary_files_BIN} --table-dir ${CMAKE_SOURCE_DIR}/data
    DEPENDS
        gen_binary_files
	${CMAKE_SOURCE_DIR}/data/gb_char.table
	${CMAKE_SOURCE_DIR}/data/gbk_char.table
)

add_custom_command(
    OUTPUT
        bigram.db
    COMMENT
        "Building binary bigram data..."
    COMMAND
        ${import_interpolation_BIN} < ${CMAKE_SOURCE_DIR}/data/interpolation.text
    COMMAND
        ${gen_unigram_BIN}
    DEPENDS
        import_interpolation
	${CMAKE_SOURCE_DIR}/data/interpolation.text
)

install(
    FILES
        ${BINARY_MODEL_DATA_FILES}
    DESTINATION
        ${DIR_SHARE_LIBPINYIN}/data
)

set_directory_properties(
    PROPERTIES
        ADDITIONAL_MAKE_CLEAN_FILES
            ${BINARY_MODEL_DATA_FILES}
)