diff options
| author | Jules Aguillon | 2024-12-19 00:34:24 +0100 |
|---|---|---|
| committer | Jules Aguillon | 2024-12-19 00:34:24 +0100 |
| commit | 4a429357ef9faa409617f867e224bc8c6814d919 (patch) | |
| tree | 40b6559afc4ee43936107d10b70830867d2ad476 /srcs/compose | |
| parent | 83c6e5d2ad3c67671fdd15245ee55bc22964ec34 (diff) | |
| download | unexpected-keyboard-4a429357ef9faa409617f867e224bc8c6814d919.tar.gz unexpected-keyboard-4a429357ef9faa409617f867e224bc8c6814d919.zip | |
compose: Fix parsing of long sequences from json files
Sequences longer than two characters were not read correctly from json
files, creating conflicts and causing dropped sequences.
The detection of collision in sequences is also improved. Two colliding
sequences are removed.
Diffstat (limited to 'srcs/compose')
| -rw-r--r-- | srcs/compose/compile.py | 49 | ||||
| -rw-r--r-- | srcs/compose/compose/cyrillic.json | 3 | ||||
| -rw-r--r-- | srcs/compose/compose/en_US_UTF_8_Compose.pre | 2 |
3 files changed, 32 insertions, 22 deletions
diff --git a/srcs/compose/compile.py b/srcs/compose/compile.py index 125e18c..e8feba3 100644 --- a/srcs/compose/compile.py +++ b/srcs/compose/compile.py @@ -99,10 +99,16 @@ def strip_cstyle_comments(inp): # Parse from a json file containing a dictionary sequence → result string. def parse_sequences_file_json(fname): + def tree_to_seqs(tree, prefix): + for c, r in tree.items(): + if isinstance(r, str): + yield prefix + [c], r + else: + yield from tree_to_seqs(r, prefix + [c]) try: with open(fname, "r") as inp: - seqs = json.loads(strip_cstyle_comments(inp)) - return list(seqs.items()) + tree = json.loads(strip_cstyle_comments(inp)) + return list(tree_to_seqs(tree, [])) except Exception as e: print("Failed parsing '%s': %s" % (fname, str(e)), file=sys.stderr) @@ -133,26 +139,33 @@ def parse_sequences_dir(dname): # Turn a list of sequences into a trie. def add_sequences_to_trie(seqs, trie): - def add_seq_to_trie(t_, seq, result): + global dropped_sequences + def add_seq_to_trie(seq, result): t_ = trie + for c in seq[:-1]: + t_ = t_.setdefault(c, {}) + if isinstance(t_, str): + return False + c = seq[-1] + if c in t_: + return False + t_[c] = result + return True + def existing_sequence_to_str(seq): # Used in error message i = 0 - while i < len(seq) - 1: - c = seq[i] - if c not in t_: - t_[c] = {} - if isinstance(t_[c], str): - global dropped_sequences - dropped_sequences += 1 - print("Sequence collide: '%s = %s' '%s = %s'" % ( - seq[:i+1], t_[c], seq, result), - file=sys.stderr) - return - t_ = t_[c] + t_ = trie + while i < len(seq): + if seq[i] not in t_: break # No collision ? + t_ = t_[seq[i]] i += 1 - c = seq[i] - t_[c] = result + if isinstance(t_, str): break + return "".join(seq[:i]) + " = " + str(t_) for seq, result in seqs: - add_seq_to_trie(trie, seq, result) + if not add_seq_to_trie(seq, result): + dropped_sequences += 1 + print("Sequence collide: '%s' and '%s = %s'" % ( + existing_sequence_to_str(seq), + "".join(seq), result), file=sys.stderr) # Compile the trie into a state machine. def make_automata(tries): diff --git a/srcs/compose/compose/cyrillic.json b/srcs/compose/compose/cyrillic.json index 61a8807..6a349aa 100644 --- a/srcs/compose/compose/cyrillic.json +++ b/srcs/compose/compose/cyrillic.json @@ -1,7 +1,4 @@ { - "\"": { - "і": "ї" - }, ",": { "г": "ӻ", "к": "ӄ", diff --git a/srcs/compose/compose/en_US_UTF_8_Compose.pre b/srcs/compose/compose/en_US_UTF_8_Compose.pre index 680f4fa..484d6d2 100644 --- a/srcs/compose/compose/en_US_UTF_8_Compose.pre +++ b/srcs/compose/compose/en_US_UTF_8_Compose.pre @@ -4016,7 +4016,7 @@ XCOMM Mathematical Operators <Multi_key> <U2225> <slash> : "∦" U2226 # NOT PARALLEL TO <Multi_key> <U223C> <slash> : "≁" U2241 # NOT TILDE <Multi_key> <U2243> <slash> : "≄" U2244 # NOT ASYMPTOTICALLY EQUAL TO -<Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +XCOMM <Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO <Multi_key> <U2248> <slash> : "≉" U2249 # NOT ALMOST EQUAL TO <Multi_key> <slash> <equal> : "≠" U2260 # NOT EQUAL TO <Multi_key> <equal> <slash> : "≠" U2260 # NOT EQUAL TO |
