diff options
| author | Peng Huang <shawn.p.huang@gmail.com> | 2010-05-30 14:55:37 +0800 |
|---|---|---|
| committer | Peng Huang <shawn.p.huang@gmail.com> | 2010-05-30 14:55:37 +0800 |
| commit | 44f4960fb84fbb3643d523932bc8612bff5ff18c (patch) | |
| tree | f15e1fae31151b25c3fe9e6a8bb61f0628d29d27 /scripts | |
| parent | 6b29ccd6523cf87c75d9f2327f930ea94a4e35a0 (diff) | |
Move python script files to scripts folder
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/update-simptrad-table.py | 71 | ||||
| l--------- | scripts/valid_hanzi.py | 1 |
2 files changed, 72 insertions, 0 deletions
diff --git a/scripts/update-simptrad-table.py b/scripts/update-simptrad-table.py new file mode 100755 index 0000000..5e9df58 --- /dev/null +++ b/scripts/update-simptrad-table.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +import sys +sys.path.append(".") + +from ZhConversion import * +from valid_hanzi import * +from sctc import * + +def convert(s, d, n): + out = u"" + end = len(s) + begin = 0 + while begin < end: + for i in range(min(n, end - begin), 0, -1): + t = s[begin:begin+i] + t = d.get(t, t if i == 1 else None) + if t: + break + out = out + t + begin += i + return out + +def filter_more(records, n): + han = filter(lambda (k, v): len(k) <= n, records) + hand = dict(han) + hanm = filter(lambda (k, v): convert(k, hand, n) != v, records) + return hanm + han + +def filter_func(args): + k, v = args + # length is not equal or length > 6 + if len(k) != len(v) or len(k) > 6: + return False + # k includes invalid hanzi + if not all(c in valid_hanzi for c in k): + return False + # v includes invalid hanzi + if not all(c in valid_hanzi for c in v): + return False + + # # check chars in k and v + # for c1, c2 in zip(k, v): + # if c1 == c2: + # continue + # if c2 not in S_2_T.get(c1, []): + # return False + return True + +def get_records(): + records = zh2Hant.items() + + records = filter(filter_func, records) + + maxlen = max(map(lambda (k,v): len(k), records)) + for i in range(1, maxlen - 1): + records = filter_more(records, i) + + records.sort() + return maxlen, records + +def main(): + + print "const wchar_t * const simp_to_trad[][2] = {" + maxlen, records = get_records() + for s, ts in records: + print ' { L"%s", L"%s" },' % (s.encode("utf8"), ts.encode("utf8")) + print "};" + print '#define SIMP_TO_TRAD_MAX_LEN (%d)' % maxlen + +if __name__ == "__main__": + main() diff --git a/scripts/valid_hanzi.py b/scripts/valid_hanzi.py new file mode 120000 index 0000000..8eb09ba --- /dev/null +++ b/scripts/valid_hanzi.py @@ -0,0 +1 @@ +../data/db/android/valid_hanzi.py
\ No newline at end of file |
