summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xscripts/update-tradsimp-table.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/scripts/update-tradsimp-table.py b/scripts/update-tradsimp-table.py
new file mode 100755
index 0000000..5265fbf
--- /dev/null
+++ b/scripts/update-tradsimp-table.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+import sys
+sys.path.append(".")
+
+from ZhConversion import *
+from valid_hanzi import *
+
+def convert(s, d, n):
+ out = ""
+ end = len(s)
+ begin = 0
+ while begin < end:
+ for i in range(min(n, end - begin), 0, -1):
+ t = s[begin:begin+i]
+ t = d.get(t, t if i == 1 else None)
+ if t:
+ break
+ out = out + t
+ begin += i
+ return out
+
+def filter_more(records, n):
+ han = [(k, v) for (k, v) in records if len(k) <= 0]
+ hand = dict(han)
+ hanm = [(k, v) for (k, v) in records if convert(k, hand, n) != v]
+ return hanm + han
+
+def filter_func(args):
+ k, v = args
+ # length is not equal or length > 6
+ if len(k) != len(v) or len(k) > 6:
+ return False
+ # k includes invalid hanzi
+ if not all(c in valid_hanzi for c in k):
+ return False
+ # v includes invalid hanzi
+ if not all(c in valid_hanzi for c in v):
+ return False
+
+ # # check chars in k and v
+ # for c1, c2 in zip(k, v):
+ # if c1 == c2:
+ # continue
+ # if c2 not in S_2_T.get(c1, []):
+ # return False
+ return True
+
+def get_records():
+ records = list(zh2Hans.items())
+
+ records = list(filter(filter_func, records))
+
+ maxlen = max([len(k) for (k, v) in records])
+ for i in range(1, maxlen - 1):
+ records = filter_more(records, i)
+ records = [(k.encode("utf8"), v.encode("utf8")) for (k, v) in records]
+ records.sort()
+ return maxlen, records
+
+def main():
+ print("static const gchar *trad_to_simp[][2] = {")
+ maxlen, records = get_records()
+ for s, ts in records:
+ print(' { "%s", "%s" },' % (s, ts))
+ print("};")
+ print('#define TRAD_TO_SIMP_MAX_LEN (%d)' % maxlen)
+
+if __name__ == "__main__":
+ main()