1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
# Create a substitution file for cdict from the various compose mappings.
# This is used when building the dictionaries and when making word suggestions
# disregarding case and diacritics.
import sys, os, json, glob, unicodedata
OUTPUT_FILE = "srcs/compose/substitutions.json"
def warn(msg):
print("Warning: " + msg, file=sys.stderr)
# From srcs/compose/compile.py
def strip_cstyle_comments(inp):
def strip_line(line):
i = line.find("//")
return line[:i] + "\n" if i >= 0 else line
return "".join(map(strip_line, inp))
def parse(fname):
with open(fname, "r") as inp:
return json.loads(strip_cstyle_comments(inp))
def is_char16(c):
return len(c) == 1 and ord(c) < 65536
def get_mappings(tree):
for c, r in tree.items():
# Remove deep compose sequences and remove mappings to non-char keys or
# to characters that do not fit in a Java 16-bit char.
if isinstance(r, str) and is_char16(r) and is_char16(c):
yield c, r
def mappings_from_compose_files():
for f in glob.glob("srcs/compose/*.json"):
if f == OUTPUT_FILE:
continue
yield from get_mappings(parse(f))
# The definition of shift doesn't contain any letters as shift is implemented
# using Java's API so we generate it using Python's API. It's not important if
# both are not equivalent.
def add_case_variants(mappings):
for c in "abcdefghijklmnopqrstuvwxyz":
yield c, c.upper()
for c, r in mappings:
c_low = c.lower()
if c_low != c and is_char16(c_low): yield c_low, r
r_up = r.upper()
if r_up != r and is_char16(r_up): yield c, r_up
yield c, r
# Remove unecessary characters to reduce the lookup time
ALLOWED_CAT = [ "Ll", "Lu", "Lt", "Lo" ]
def remove_non_letters(mappings):
for c, r in mappings:
cat = unicodedata.category(c)
if cat in ALLOWED_CAT:
yield c, r
def resolve_mappings(mappings):
m = {}
# Sort mappings to keep the lowest char in case of a conflict
for c, r in sorted(mappings, key=lambda it: it[1]):
if r in m:
if m[r] != c:
warn("Conflicting mapping '%s -> %s' and '%s -> %s'" %
(c, r, m[r], r))
continue
m[r] = c
def resolve(c, trace=None):
if c in m:
if trace is None:
trace = set()
elif c in trace:
return c
trace.add(c)
return resolve(m[c], trace=trace)
return c
return { r: resolve(c) for r, c in m.items() }
with open(OUTPUT_FILE, "w") as out:
json.dump(
resolve_mappings(
add_case_variants(
remove_non_letters(
mappings_from_compose_files()))),
out, ensure_ascii=False, indent=2)
print("Generated " + OUTPUT_FILE)
|