summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2013-01-14 15:15:05 +0800
committerPeng Wu <alexepico@gmail.com>2013-01-14 15:15:05 +0800
commite39223215f96debb725fd9a85f8e2df7d5ff03d7 (patch)
treefeb0df43047b168fb28bb05cbac73d824ac1cd75 /scripts
parent500c7a720fc5cbce34da5bb1eddcf2b50b20e76d (diff)
downloadlibpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.tar.gz
libpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.tar.xz
libpinyin-e39223215f96debb725fd9a85f8e2df7d5ff03d7.zip
code re-factor
Diffstat (limited to 'scripts')
-rw-r--r--scripts/Makefile.data4
-rw-r--r--scripts/specialtable.py19
2 files changed, 5 insertions, 18 deletions
diff --git a/scripts/Makefile.data b/scripts/Makefile.data
index 1457407..7929e97 100644
--- a/scripts/Makefile.data
+++ b/scripts/Makefile.data
@@ -2,9 +2,7 @@ all: pinyins.txt
pinyins.txt:
- awk -f genpinyins.awk -vlen=1 ../data/gb_char.table \
- ../data/gbk_char.table > $@
- awk -f genpinyins.awk -vlen=2 ../data/gb_char.table >> $@
+ python3 genpinyins.py
update-header:
diff --git a/scripts/specialtable.py b/scripts/specialtable.py
index 5d1d599..b6fb680 100644
--- a/scripts/specialtable.py
+++ b/scripts/specialtable.py
@@ -37,28 +37,17 @@ def load_phrase(filename):
for line in phrasefile.readlines():
line = line.rstrip(os.linesep)
(pinyin_str, freq) = line.split(None, 1)
- freq = int(math.floor(float(freq)))
+ freq = int(freq)
if 0 == freq:
#print(pinyin_str)
continue
+ # no duplicate here
if "'" in pinyin_str:
(first_key, second_key) = pinyin_str.split("'")
- if first_key[-1].isdigit():
- first_key = first_key[:-1]
- if second_key[-1].isdigit():
- second_key = second_key[:-1]
- if (first_key, second_key) in phrase_dict:
- phrase_dict[(first_key, second_key)] += freq
- else:
- phrase_dict[(first_key, second_key)] = freq
+ phrase_dict[(first_key, second_key)] = freq
else:
- if pinyin_str[-1].isdigit():
- pinyin_str = pinyin_str[:-1]
- if pinyin_str in phrase_dict:
- phrase_dict[pinyin_str] += freq
- else:
- phrase_dict[pinyin_str] = freq
+ phrase_dict[pinyin_str] = freq
phrasefile.close()