summaryrefslogtreecommitdiffstats
path: root/scripts2/fullpinyintable.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts2/fullpinyintable.py')
-rw-r--r--scripts2/fullpinyintable.py100
1 files changed, 1 insertions, 99 deletions
diff --git a/scripts2/fullpinyintable.py b/scripts2/fullpinyintable.py
index 6ad05be..b8cb1a6 100644
--- a/scripts2/fullpinyintable.py
+++ b/scripts2/fullpinyintable.py
@@ -25,7 +25,7 @@ import itertools
import chewing
from pyzymap import ZHUYIN_PINYIN_MAP, ZHUYIN_LUOMA_PINYIN_MAP, ZHUYIN_SECONDARY_ZHUYIN_MAP
from pyzymap import PINYIN_ZHUYIN_MAP, ZHUYIN_SPECIAL_INITIAL_SET_IN_PINYIN_FORM
-from fullpinyin import PINYIN_LIST, SHENGMU_LIST, YUNMU_LIST
+from fullpinyin import PINYIN_LIST, SHENGMU_LIST
from options import *
from utils import shuffle_all
@@ -403,94 +403,6 @@ def gen_table_index_for_chewing_key(content_table):
return ",\n".join(entries)
-#special table
-pinyin_list = sorted(PINYIN_LIST)
-shengmu_list = sorted(SHENGMU_LIST)
-yunmu_list = sorted(YUNMU_LIST)
-
-phrase_dict = {}
-
-
-def load_phrase(filename):
- phrasefile = open(filename, "r")
- for line in phrasefile.readlines():
- line = line.rstrip(os.linesep)
- (pinyin_str, freq) = line.split(None, 1)
- freq = int(freq)
- if 0 == freq:
- #print(pinyin_str)
- continue
-
- # no duplicate here
- if "'" in pinyin_str:
- (first_key, second_key) = pinyin_str.split("'")
- phrase_dict[(first_key, second_key)] = freq
- else:
- phrase_dict[pinyin_str] = freq
- phrasefile.close()
-
-
-def gen_all_divided():
- for pinyin_key in pinyin_list:
- for first_key in pinyin_list:
- if len(pinyin_key) <= len(first_key):
- continue
- if not pinyin_key.startswith(first_key):
- continue
- second_key = pinyin_key[len(first_key):]
- if second_key in pinyin_list:
- yield pinyin_key, first_key, second_key
-
-
-def filter_divided():
- for (pinyin_key, first_key, second_key) in gen_all_divided():
- if not (first_key, second_key) in phrase_dict:
- continue
- orig_freq = 0
- if pinyin_key in phrase_dict:
- orig_freq = phrase_dict[pinyin_key]
- new_freq = phrase_dict[(first_key, second_key)]
- yield pinyin_key, orig_freq, first_key, second_key, new_freq
-
-
-def gen_all_resplit():
- for pinyin_key in pinyin_list:
- if pinyin_key[-1] in ["n", "g", "r"]:
- for yun in yunmu_list:
- if yun not in pinyin_list:
- continue
- #check first new pinyin key
- if not pinyin_key[:-1] in pinyin_list:
- continue
- #check second new pinyin key
- new_pinyin_key = pinyin_key[-1] + yun
- if new_pinyin_key in pinyin_list:
- yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
-'''
- elif pinyin_key[-1] in ["e"]:
- #check first new pinyin key
- if pinyin_key[:-1] in pinyin_list:
- yield pinyin_key, "r", pinyin_key[:-1], "er"
-'''
-
-
-def filter_resplit():
- for (orig_first_key, orig_second_key, new_first_key, new_second_key) \
- in gen_all_resplit():
- #do the reverse here, as libpinyin pinyin parser is different with
- #ibus-pinyin's parser.
- (orig_first_key, orig_second_key, new_first_key, new_second_key) = \
- (new_first_key, new_second_key, orig_first_key, orig_second_key)
- if (new_first_key, new_second_key) not in phrase_dict:
- continue
- orig_freq = 0
- new_freq = phrase_dict[(new_first_key, new_second_key)]
- if (orig_first_key, orig_second_key) in phrase_dict:
- orig_freq = phrase_dict[(orig_first_key, orig_second_key)]
- yield orig_first_key, orig_second_key, orig_freq, \
- new_first_key, new_second_key, new_freq
-
-
#init full pinyin table code
filter_pinyin_list()
check_rules(hsu_correct, hsu_correct_special)
@@ -498,10 +410,6 @@ check_rules(eten26_correct, eten26_correct_special)
populate_more_zhuyin_index()
sort_all()
-#init resplit/divided table code
-load_phrase("pinyins.txt")
-#load_phrase("specials.txt")
-
### main function ###
if __name__ == "__main__":
@@ -517,9 +425,3 @@ if __name__ == "__main__":
s = gen_hsu_zhuyin_index() + gen_eten26_zhuyin_index()
s = gen_table_index_for_chewing_key(content_table)
print(s)
-
- #dump
- for p in filter_divided():
- print (p)
- for p in filter_resplit():
- print (p)