From 4a429357ef9faa409617f867e224bc8c6814d919 Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Thu, 19 Dec 2024 00:34:24 +0100 Subject: compose: Fix parsing of long sequences from json files Sequences longer than two characters were not read correctly from json files, creating conflicts and causing dropped sequences. The detection of collision in sequences is also improved. Two colliding sequences are removed. --- srcs/compose/compile.py | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) (limited to 'srcs/compose/compile.py') diff --git a/srcs/compose/compile.py b/srcs/compose/compile.py index 125e18c..e8feba3 100644 --- a/srcs/compose/compile.py +++ b/srcs/compose/compile.py @@ -99,10 +99,16 @@ def strip_cstyle_comments(inp): # Parse from a json file containing a dictionary sequence → result string. def parse_sequences_file_json(fname): + def tree_to_seqs(tree, prefix): + for c, r in tree.items(): + if isinstance(r, str): + yield prefix + [c], r + else: + yield from tree_to_seqs(r, prefix + [c]) try: with open(fname, "r") as inp: - seqs = json.loads(strip_cstyle_comments(inp)) - return list(seqs.items()) + tree = json.loads(strip_cstyle_comments(inp)) + return list(tree_to_seqs(tree, [])) except Exception as e: print("Failed parsing '%s': %s" % (fname, str(e)), file=sys.stderr) @@ -133,26 +139,33 @@ def parse_sequences_dir(dname): # Turn a list of sequences into a trie. def add_sequences_to_trie(seqs, trie): - def add_seq_to_trie(t_, seq, result): + global dropped_sequences + def add_seq_to_trie(seq, result): t_ = trie + for c in seq[:-1]: + t_ = t_.setdefault(c, {}) + if isinstance(t_, str): + return False + c = seq[-1] + if c in t_: + return False + t_[c] = result + return True + def existing_sequence_to_str(seq): # Used in error message i = 0 - while i < len(seq) - 1: - c = seq[i] - if c not in t_: - t_[c] = {} - if isinstance(t_[c], str): - global dropped_sequences - dropped_sequences += 1 - print("Sequence collide: '%s = %s' '%s = %s'" % ( - seq[:i+1], t_[c], seq, result), - file=sys.stderr) - return - t_ = t_[c] + t_ = trie + while i < len(seq): + if seq[i] not in t_: break # No collision ? + t_ = t_[seq[i]] i += 1 - c = seq[i] - t_[c] = result + if isinstance(t_, str): break + return "".join(seq[:i]) + " = " + str(t_) for seq, result in seqs: - add_seq_to_trie(trie, seq, result) + if not add_seq_to_trie(seq, result): + dropped_sequences += 1 + print("Sequence collide: '%s' and '%s = %s'" % ( + existing_sequence_to_str(seq), + "".join(seq), result), file=sys.stderr) # Compile the trie into a state machine. def make_automata(tries): -- cgit v1.2.3