abouttreesummaryrefslogcommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/subst_of_compose.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/scripts/subst_of_compose.py b/scripts/subst_of_compose.py
new file mode 100644
index 0000000..cb5fc18
--- /dev/null
+++ b/scripts/subst_of_compose.py
@@ -0,0 +1,90 @@
+# Create a substitution file for cdict from the various compose mappings.
+# This is used when building the dictionaries and when making word suggestions
+# disregarding case and diacritics.
+
+import sys, os, json, glob, unicodedata
+
+OUTPUT_FILE = "srcs/compose/substitutions.json"
+
+def warn(msg):
+ print("Warning: " + msg, file=sys.stderr)
+
+# From srcs/compose/compile.py
+def strip_cstyle_comments(inp):
+ def strip_line(line):
+ i = line.find("//")
+ return line[:i] + "\n" if i >= 0 else line
+ return "".join(map(strip_line, inp))
+
+def parse(fname):
+ with open(fname, "r") as inp:
+ return json.loads(strip_cstyle_comments(inp))
+
+def is_char16(c):
+ return len(c) == 1 and ord(c) < 65536
+
+def get_mappings(tree):
+ for c, r in tree.items():
+ # Remove deep compose sequences and remove mappings to non-char keys or
+ # to characters that do not fit in a Java 16-bit char.
+ if isinstance(r, str) and is_char16(r) and is_char16(c):
+ yield c, r
+
+def mappings_from_compose_files():
+ for f in glob.glob("srcs/compose/*.json"):
+ if f == OUTPUT_FILE:
+ continue
+ yield from get_mappings(parse(f))
+
+# The definition of shift doesn't contain any letters as shift is implemented
+# using Java's API so we generate it using Python's API. It's not important if
+# both are not equivalent.
+def add_case_variants(mappings):
+ for c in "abcdefghijklmnopqrstuvwxyz":
+ yield c, c.upper()
+ for c, r in mappings:
+ c_low = c.lower()
+ if c_low != c and is_char16(c_low): yield c_low, r
+ r_up = r.upper()
+ if r_up != r and is_char16(r_up): yield c, r_up
+ yield c, r
+
+# Remove unecessary characters to reduce the lookup time
+ALLOWED_CAT = [ "Ll", "Lu", "Lt", "Lo" ]
+def remove_non_letters(mappings):
+ for c, r in mappings:
+ cat = unicodedata.category(c)
+ if cat in ALLOWED_CAT:
+ yield c, r
+
+def resolve_mappings(mappings):
+ m = {}
+ # Sort mappings to keep the lowest char in case of a conflict
+ for c, r in sorted(mappings, key=lambda it: it[1]):
+ if r in m:
+ if m[r] != c:
+ warn("Conflicting mapping '%s -> %s' and '%s -> %s'" %
+ (c, r, m[r], r))
+ continue
+ m[r] = c
+ def resolve(c, trace=None):
+ if c in m:
+ if trace is None:
+ trace = set()
+ elif c in trace:
+ return c
+ trace.add(c)
+ return resolve(m[c], trace=trace)
+ return c
+ return { r: resolve(c) for r, c in m.items() }
+
+
+with open(OUTPUT_FILE, "w") as out:
+ json.dump(
+ resolve_mappings(
+ add_case_variants(
+ remove_non_letters(
+ mappings_from_compose_files()))),
+ out, ensure_ascii=False, indent=2)
+
+print("Generated " + OUTPUT_FILE)