summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPeng Huang <shawn.p.huang@gmail.com>2010-05-30 14:55:37 +0800
committerPeng Huang <shawn.p.huang@gmail.com>2010-05-30 14:55:37 +0800
commit44f4960fb84fbb3643d523932bc8612bff5ff18c (patch)
treef15e1fae31151b25c3fe9e6a8bb61f0628d29d27 /scripts
parent6b29ccd6523cf87c75d9f2327f930ea94a4e35a0 (diff)
Move python script files to scripts folder
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/update-simptrad-table.py71
l---------scripts/valid_hanzi.py1
2 files changed, 72 insertions, 0 deletions
diff --git a/scripts/update-simptrad-table.py b/scripts/update-simptrad-table.py
new file mode 100755
index 0000000..5e9df58
--- /dev/null
+++ b/scripts/update-simptrad-table.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+import sys
+sys.path.append(".")
+
+from ZhConversion import *
+from valid_hanzi import *
+from sctc import *
+
+def convert(s, d, n):
+ out = u""
+ end = len(s)
+ begin = 0
+ while begin < end:
+ for i in range(min(n, end - begin), 0, -1):
+ t = s[begin:begin+i]
+ t = d.get(t, t if i == 1 else None)
+ if t:
+ break
+ out = out + t
+ begin += i
+ return out
+
+def filter_more(records, n):
+ han = filter(lambda (k, v): len(k) <= n, records)
+ hand = dict(han)
+ hanm = filter(lambda (k, v): convert(k, hand, n) != v, records)
+ return hanm + han
+
+def filter_func(args):
+ k, v = args
+ # length is not equal or length > 6
+ if len(k) != len(v) or len(k) > 6:
+ return False
+ # k includes invalid hanzi
+ if not all(c in valid_hanzi for c in k):
+ return False
+ # v includes invalid hanzi
+ if not all(c in valid_hanzi for c in v):
+ return False
+
+ # # check chars in k and v
+ # for c1, c2 in zip(k, v):
+ # if c1 == c2:
+ # continue
+ # if c2 not in S_2_T.get(c1, []):
+ # return False
+ return True
+
+def get_records():
+ records = zh2Hant.items()
+
+ records = filter(filter_func, records)
+
+ maxlen = max(map(lambda (k,v): len(k), records))
+ for i in range(1, maxlen - 1):
+ records = filter_more(records, i)
+
+ records.sort()
+ return maxlen, records
+
+def main():
+
+ print "const wchar_t * const simp_to_trad[][2] = {"
+ maxlen, records = get_records()
+ for s, ts in records:
+ print ' { L"%s", L"%s" },' % (s.encode("utf8"), ts.encode("utf8"))
+ print "};"
+ print '#define SIMP_TO_TRAD_MAX_LEN (%d)' % maxlen
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/valid_hanzi.py b/scripts/valid_hanzi.py
new file mode 120000
index 0000000..8eb09ba
--- /dev/null
+++ b/scripts/valid_hanzi.py
@@ -0,0 +1 @@
+../data/db/android/valid_hanzi.py \ No newline at end of file