abouttreesummaryrefslogcommitdiff
path: root/srcs/compose
diff options
context:
space:
mode:
authorJules Aguillon2024-12-19 00:34:24 +0100
committerJules Aguillon2024-12-19 00:34:24 +0100
commit4a429357ef9faa409617f867e224bc8c6814d919 (patch)
tree40b6559afc4ee43936107d10b70830867d2ad476 /srcs/compose
parent83c6e5d2ad3c67671fdd15245ee55bc22964ec34 (diff)
downloadunexpected-keyboard-4a429357ef9faa409617f867e224bc8c6814d919.tar.gz
unexpected-keyboard-4a429357ef9faa409617f867e224bc8c6814d919.zip
compose: Fix parsing of long sequences from json files
Sequences longer than two characters were not read correctly from json files, creating conflicts and causing dropped sequences. The detection of collision in sequences is also improved. Two colliding sequences are removed.
Diffstat (limited to 'srcs/compose')
-rw-r--r--srcs/compose/compile.py49
-rw-r--r--srcs/compose/compose/cyrillic.json3
-rw-r--r--srcs/compose/compose/en_US_UTF_8_Compose.pre2
3 files changed, 32 insertions, 22 deletions
diff --git a/srcs/compose/compile.py b/srcs/compose/compile.py
index 125e18c..e8feba3 100644
--- a/srcs/compose/compile.py
+++ b/srcs/compose/compile.py
@@ -99,10 +99,16 @@ def strip_cstyle_comments(inp):
# Parse from a json file containing a dictionary sequence → result string.
def parse_sequences_file_json(fname):
+ def tree_to_seqs(tree, prefix):
+ for c, r in tree.items():
+ if isinstance(r, str):
+ yield prefix + [c], r
+ else:
+ yield from tree_to_seqs(r, prefix + [c])
try:
with open(fname, "r") as inp:
- seqs = json.loads(strip_cstyle_comments(inp))
- return list(seqs.items())
+ tree = json.loads(strip_cstyle_comments(inp))
+ return list(tree_to_seqs(tree, []))
except Exception as e:
print("Failed parsing '%s': %s" % (fname, str(e)), file=sys.stderr)
@@ -133,26 +139,33 @@ def parse_sequences_dir(dname):
# Turn a list of sequences into a trie.
def add_sequences_to_trie(seqs, trie):
- def add_seq_to_trie(t_, seq, result):
+ global dropped_sequences
+ def add_seq_to_trie(seq, result):
t_ = trie
+ for c in seq[:-1]:
+ t_ = t_.setdefault(c, {})
+ if isinstance(t_, str):
+ return False
+ c = seq[-1]
+ if c in t_:
+ return False
+ t_[c] = result
+ return True
+ def existing_sequence_to_str(seq): # Used in error message
i = 0
- while i < len(seq) - 1:
- c = seq[i]
- if c not in t_:
- t_[c] = {}
- if isinstance(t_[c], str):
- global dropped_sequences
- dropped_sequences += 1
- print("Sequence collide: '%s = %s' '%s = %s'" % (
- seq[:i+1], t_[c], seq, result),
- file=sys.stderr)
- return
- t_ = t_[c]
+ t_ = trie
+ while i < len(seq):
+ if seq[i] not in t_: break # No collision ?
+ t_ = t_[seq[i]]
i += 1
- c = seq[i]
- t_[c] = result
+ if isinstance(t_, str): break
+ return "".join(seq[:i]) + " = " + str(t_)
for seq, result in seqs:
- add_seq_to_trie(trie, seq, result)
+ if not add_seq_to_trie(seq, result):
+ dropped_sequences += 1
+ print("Sequence collide: '%s' and '%s = %s'" % (
+ existing_sequence_to_str(seq),
+ "".join(seq), result), file=sys.stderr)
# Compile the trie into a state machine.
def make_automata(tries):
diff --git a/srcs/compose/compose/cyrillic.json b/srcs/compose/compose/cyrillic.json
index 61a8807..6a349aa 100644
--- a/srcs/compose/compose/cyrillic.json
+++ b/srcs/compose/compose/cyrillic.json
@@ -1,7 +1,4 @@
{
- "\"": {
- "і": "ї"
- },
",": {
"г": "ӻ",
"к": "ӄ",
diff --git a/srcs/compose/compose/en_US_UTF_8_Compose.pre b/srcs/compose/compose/en_US_UTF_8_Compose.pre
index 680f4fa..484d6d2 100644
--- a/srcs/compose/compose/en_US_UTF_8_Compose.pre
+++ b/srcs/compose/compose/en_US_UTF_8_Compose.pre
@@ -4016,7 +4016,7 @@ XCOMM Mathematical Operators
<Multi_key> <U2225> <slash> : "∦" U2226 # NOT PARALLEL TO
<Multi_key> <U223C> <slash> : "≁" U2241 # NOT TILDE
<Multi_key> <U2243> <slash> : "≄" U2244 # NOT ASYMPTOTICALLY EQUAL TO
-<Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+XCOMM <Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
<Multi_key> <U2248> <slash> : "≉" U2249 # NOT ALMOST EQUAL TO
<Multi_key> <slash> <equal> : "≠" U2260 # NOT EQUAL TO
<Multi_key> <equal> <slash> : "≠" U2260 # NOT EQUAL TO