From 056c8a42c2d131ec30b80bbf921314693ee381f8 Mon Sep 17 00:00:00 2001
From: Peng Wu <alexepico@gmail.com>
Date: Thu, 8 Aug 2013 13:41:12 +0800
Subject: clean up scripts

---
 scripts/Makefile.data      |   9 +---
 scripts/genpinyinheader.py |   5 +-
 scripts/genpinyins.py      |  57 ---------------------
 scripts/genspecialtable.py |  93 ----------------------------------
 scripts/specials.txt       |   0
 scripts/specialtable.py    | 123 ---------------------------------------------
 6 files changed, 3 insertions(+), 284 deletions(-)
 delete mode 100644 scripts/genpinyins.py
 delete mode 100644 scripts/genspecialtable.py
 delete mode 100644 scripts/specials.txt
 delete mode 100644 scripts/specialtable.py

(limited to 'scripts')

diff --git a/scripts/Makefile.data b/scripts/Makefile.data
index 49f65b4..624db75 100644
--- a/scripts/Makefile.data
+++ b/scripts/Makefile.data
@@ -1,14 +1,7 @@
-all: pinyins.txt
-
-
-pinyins.txt:
-	python3 genpinyins.py
+all:
 
 
 update-header:
 	python3 genpinyinheader.py > ../src/storage/pinyin_parser_table.h
 	python3 genbopomofoheader.py > ../src/storage/chewing_table.h
 	python3 genchewingkey.py > ../src/storage/chewing_enum.h
-
-
-.PHONY: pinyins.txt
diff --git a/scripts/genpinyinheader.py b/scripts/genpinyinheader.py
index 81e0538..283c64a 100644
--- a/scripts/genpinyinheader.py
+++ b/scripts/genpinyinheader.py
@@ -24,7 +24,6 @@ from utils import expand_file
 from genpinyintable import gen_content_table, \
     gen_pinyin_index, gen_bopomofo_index, \
     gen_chewing_key_table
-from genspecialtable import gen_divided_table, gen_resplit_table
 
 def get_table_content(tablename):
     if tablename == 'CONTENT_TABLE':
@@ -34,9 +33,9 @@ def get_table_content(tablename):
     if tablename == 'BOPOMOFO_INDEX':
         return gen_bopomofo_index()
     if tablename == 'DIVIDED_TABLE':
-        return gen_divided_table()
+        return ''
     if tablename == 'RESPLIT_TABLE':
-        return gen_resplit_table()
+        return ''
     if tablename == 'TABLE_INDEX':
         return gen_chewing_key_table()
 
diff --git a/scripts/genpinyins.py b/scripts/genpinyins.py
deleted file mode 100644
index fef40cd..0000000
--- a/scripts/genpinyins.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/python3
-import os
-from operator import itemgetter
-
-pinyin_dict = {}
-
-
-def strip_tone(old_pinyin_str):
-    oldpinyins = old_pinyin_str.split("'")
-    newpinyins = []
-
-    for pinyin in oldpinyins:
-        if pinyin[-1].isdigit():
-            pinyin = pinyin[:-1]
-        newpinyins.append(pinyin)
-
-    new_pinyin_str = "'".join(newpinyins)
-    return new_pinyin_str
-
-
-def add_pinyin_dict(pinyin, freq):
-    if 0 == freq:
-        return
-    if not pinyin in pinyin_dict:
-        pinyin_dict[pinyin] = freq
-    else:
-        pinyin_dict[pinyin] += freq
-
-
-def load_phrase(filename):
-    phrasefile = open(filename, "r")
-    for line in phrasefile.readlines():
-        line = line.rstrip(os.linesep)
-        (pinyin, word, token, freq) = line.split(None, 3)
-        pinyin = strip_tone(pinyin)
-        freq = int(freq)
-
-        if len(word) in [1, 2]:
-            add_pinyin_dict(pinyin, freq)
-
-    phrasefile.close()
-
-load_phrase("../data/gb_char.table")
-load_phrase("../data/gbk_char.table")
-
-
-def save_pinyin(filename):
-    pinyinfile = open(filename, "w")
-    for pinyin, freq in pinyin_dict.items():
-        freq = str(freq)
-        line = "\t".join((pinyin, freq))
-        pinyinfile.writelines([line, os.linesep])
-    pinyinfile.close()
-
-
-if __name__ == "__main__":
-    save_pinyin("pinyins.txt")
diff --git a/scripts/genspecialtable.py b/scripts/genspecialtable.py
deleted file mode 100644
index 061f9d1..0000000
--- a/scripts/genspecialtable.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libpinyin - Library to deal with pinyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-
-import operator
-import pinyin
-from pinyintable import get_chewing, get_shengmu_chewing
-from specialtable import *
-
-pinyin_list = sorted(pinyin.PINYIN_LIST)
-shengmu_list = sorted(pinyin.SHENGMU_LIST)
-
-divided_list = []
-resplit_list = []
-
-
-def sort_all():
-    global divided_list, resplit_list
-    divided_list = sorted(divided_list, key=operator.itemgetter(0))
-    resplit_list = sorted(resplit_list, key=operator.itemgetter(0, 1))
-
-'''
-def get_chewing_string(pinyin):
-    #handle shengmu
-    if pinyin not in pinyin_list:
-        if pinyin in shengmu_list:
-            chewing_key = get_shengmu_chewing(pinyin)
-        else:
-            assert False, "Un-expected pinyin string."
-    else:
-        chewing_key = get_chewing(pinyin)
-    chewing_str = 'ChewingKey({0})'.format(', '.join(chewing_key))
-    return chewing_str
-'''
-
-def gen_divided_table():
-    entries = []
-    for (pinyin_key, orig_freq, first_key, second_key, new_freq) \
-            in divided_list:
-
-        if orig_freq >= new_freq:
-            assert orig_freq > 0, "Expected orig_freq > 0 here."
-
-        entry = '{{"{0}", {1}, {{"{2}", "{3}"}}, {4}}}'.format \
-            (pinyin_key, orig_freq, first_key, second_key, new_freq)
-        entries.append(entry)
-    return ',\n'.join(entries)
-
-
-def gen_resplit_table():
-    entries = []
-    for (orig_first_key, orig_second_key, orig_freq, \
-        new_first_key, new_second_key, new_freq) in resplit_list:
-
-        if orig_freq >= new_freq:
-            assert orig_freq > 0, "Expected orig_freq > 0 here."
-
-        entry = '{{{{"{0}", "{1}"}}, {2}, {{"{3}", "{4}"}}, {5}}}'.format \
-            (orig_first_key, orig_second_key, orig_freq,\
-                 new_first_key, new_second_key, new_freq)
-        entries.append(entry)
-    return ',\n'.join(entries)
-
-
-#init code, load lists
-divided_list = filter_divided()
-resplit_list = filter_resplit()
-sort_all()
-
-
-### main function ###
-if __name__ == "__main__":
-    s = gen_divided_table() + '\n' + gen_resplit_table()
-    print(s)
-
diff --git a/scripts/specials.txt b/scripts/specials.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/scripts/specialtable.py b/scripts/specialtable.py
deleted file mode 100644
index b6fb680..0000000
--- a/scripts/specialtable.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# libpinyin - Library to deal with pinyin.
-#
-# Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-
-import os
-import sys
-import math
-import pinyin
-
-pinyin_list = sorted(pinyin.PINYIN_LIST)
-shengmu_list = sorted(pinyin.SHENGMU_LIST)
-yunmu_list = sorted(pinyin.YUNMU_LIST)
-
-phrase_dict = {}
-
-
-def load_phrase(filename):
-    phrasefile = open(filename, "r")
-    for line in phrasefile.readlines():
-        line = line.rstrip(os.linesep)
-        (pinyin_str, freq) = line.split(None, 1)
-        freq = int(freq)
-        if 0 == freq:
-            #print(pinyin_str)
-            continue
-
-        # no duplicate here
-        if "'" in pinyin_str:
-            (first_key, second_key) = pinyin_str.split("'")
-            phrase_dict[(first_key, second_key)] = freq
-        else:
-            phrase_dict[pinyin_str] = freq
-    phrasefile.close()
-
-
-def gen_all_divided():
-    for pinyin_key in pinyin_list:
-        for first_key in pinyin_list:
-            if len(pinyin_key) <= len(first_key):
-                continue
-            if not pinyin_key.startswith(first_key):
-                continue
-            second_key = pinyin_key[len(first_key):]
-            if second_key in pinyin_list:
-                yield pinyin_key, first_key, second_key
-
-
-def filter_divided():
-    for (pinyin_key, first_key, second_key) in gen_all_divided():
-        if not (first_key, second_key) in phrase_dict:
-            continue
-        orig_freq = 0
-        if pinyin_key in phrase_dict:
-            orig_freq = phrase_dict[pinyin_key]
-        new_freq = phrase_dict[(first_key, second_key)]
-        yield pinyin_key, orig_freq, first_key, second_key, new_freq
-
-
-def gen_all_resplit():
-    for pinyin_key in pinyin_list:
-        if pinyin_key[-1] in ["n", "g", "r"]:
-            for yun in yunmu_list:
-                if yun not in pinyin_list:
-                    continue
-                #check first new pinyin key
-                if not pinyin_key[:-1] in pinyin_list:
-                    continue
-                #check second new pinyin key
-                new_pinyin_key = pinyin_key[-1] + yun
-                if new_pinyin_key in pinyin_list:
-                    yield pinyin_key, yun, pinyin_key[:-1], new_pinyin_key
-'''
-        elif pinyin_key[-1] in ["e"]:
-            #check first new pinyin key
-            if pinyin_key[:-1] in pinyin_list:
-                yield pinyin_key, "r", pinyin_key[:-1], "er"
-'''
-
-
-def filter_resplit():
-    for (orig_first_key, orig_second_key, new_first_key, new_second_key) \
-    in gen_all_resplit():
-        #do the reverse here, as libpinyin pinyin parser is different with
-        #ibus-pinyin's parser.
-        (orig_first_key, orig_second_key, new_first_key, new_second_key) = \
-            (new_first_key, new_second_key, orig_first_key, orig_second_key)
-        if (new_first_key, new_second_key) not in phrase_dict:
-            continue
-        orig_freq = 0
-        new_freq = phrase_dict[(new_first_key, new_second_key)]
-        if (orig_first_key, orig_second_key) in phrase_dict:
-            orig_freq = phrase_dict[(orig_first_key, orig_second_key)]
-        yield orig_first_key, orig_second_key, orig_freq, \
-        new_first_key, new_second_key, new_freq
-
-
-#init code
-load_phrase("pinyins.txt")
-load_phrase("specials.txt")
-
-if __name__ == "__main__":
-    for p in filter_divided():
-        print (p)
-    for p in filter_resplit():
-        print (p)
-- 
cgit