blob: 52c473a248b5865a7015fbeb0596d779fe4acb08 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
def get_validate_hanzi():
validate_hanzi = file("valid_utf16.txt").read().decode("utf16")
return set(validate_hanzi)
def main():
hanzi = get_validate_hanzi()
hanzi = list(hanzi)
hanzi.sort()
print "# -*- coding: utf-8 -*- "
print "valid_hanzi = set(["
for c in hanzi:
print " u\"%s\"," % c.encode("utf8")
print "])"
if __name__ == "__main__":
main()
|