diff options
Diffstat (limited to 'srcs/compose')
| -rw-r--r-- | srcs/compose/compile.py | 49 | ||||
| -rw-r--r-- | srcs/compose/compose/cyrillic.json | 3 | ||||
| -rw-r--r-- | srcs/compose/compose/en_US_UTF_8_Compose.pre | 2 |
3 files changed, 32 insertions, 22 deletions
diff --git a/srcs/compose/compile.py b/srcs/compose/compile.py index 125e18c..e8feba3 100644 --- a/srcs/compose/compile.py +++ b/srcs/compose/compile.py @@ -99,10 +99,16 @@ def strip_cstyle_comments(inp): # Parse from a json file containing a dictionary sequence → result string. def parse_sequences_file_json(fname): + def tree_to_seqs(tree, prefix): + for c, r in tree.items(): + if isinstance(r, str): + yield prefix + [c], r + else: + yield from tree_to_seqs(r, prefix + [c]) try: with open(fname, "r") as inp: - seqs = json.loads(strip_cstyle_comments(inp)) - return list(seqs.items()) + tree = json.loads(strip_cstyle_comments(inp)) + return list(tree_to_seqs(tree, [])) except Exception as e: print("Failed parsing '%s': %s" % (fname, str(e)), file=sys.stderr) @@ -133,26 +139,33 @@ def parse_sequences_dir(dname): # Turn a list of sequences into a trie. def add_sequences_to_trie(seqs, trie): - def add_seq_to_trie(t_, seq, result): + global dropped_sequences + def add_seq_to_trie(seq, result): t_ = trie + for c in seq[:-1]: + t_ = t_.setdefault(c, {}) + if isinstance(t_, str): + return False + c = seq[-1] + if c in t_: + return False + t_[c] = result + return True + def existing_sequence_to_str(seq): # Used in error message i = 0 - while i < len(seq) - 1: - c = seq[i] - if c not in t_: - t_[c] = {} - if isinstance(t_[c], str): - global dropped_sequences - dropped_sequences += 1 - print("Sequence collide: '%s = %s' '%s = %s'" % ( - seq[:i+1], t_[c], seq, result), - file=sys.stderr) - return - t_ = t_[c] + t_ = trie + while i < len(seq): + if seq[i] not in t_: break # No collision ? + t_ = t_[seq[i]] i += 1 - c = seq[i] - t_[c] = result + if isinstance(t_, str): break + return "".join(seq[:i]) + " = " + str(t_) for seq, result in seqs: - add_seq_to_trie(trie, seq, result) + if not add_seq_to_trie(seq, result): + dropped_sequences += 1 + print("Sequence collide: '%s' and '%s = %s'" % ( + existing_sequence_to_str(seq), + "".join(seq), result), file=sys.stderr) # Compile the trie into a state machine. def make_automata(tries): diff --git a/srcs/compose/compose/cyrillic.json b/srcs/compose/compose/cyrillic.json index 61a8807..6a349aa 100644 --- a/srcs/compose/compose/cyrillic.json +++ b/srcs/compose/compose/cyrillic.json @@ -1,7 +1,4 @@ { - "\"": { - "і": "ї" - }, ",": { "г": "ӻ", "к": "ӄ", diff --git a/srcs/compose/compose/en_US_UTF_8_Compose.pre b/srcs/compose/compose/en_US_UTF_8_Compose.pre index 680f4fa..484d6d2 100644 --- a/srcs/compose/compose/en_US_UTF_8_Compose.pre +++ b/srcs/compose/compose/en_US_UTF_8_Compose.pre @@ -4016,7 +4016,7 @@ XCOMM Mathematical Operators <Multi_key> <U2225> <slash> : "∦" U2226 # NOT PARALLEL TO <Multi_key> <U223C> <slash> : "≁" U2241 # NOT TILDE <Multi_key> <U2243> <slash> : "≄" U2244 # NOT ASYMPTOTICALLY EQUAL TO -<Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +XCOMM <Multi_key> <approximate> <slash> : "≇" U2247 # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO <Multi_key> <U2248> <slash> : "≉" U2249 # NOT ALMOST EQUAL TO <Multi_key> <slash> <equal> : "≠" U2260 # NOT EQUAL TO <Multi_key> <equal> <slash> : "≠" U2260 # NOT EQUAL TO |
