diff options
| author | Patrick Schönberger | 2021-08-14 14:56:12 +0200 |
|---|---|---|
| committer | Patrick Schönberger | 2021-08-14 14:56:12 +0200 |
| commit | c6ad2948bb98d42f8e0883ef82cd14cd2d5eda60 (patch) | |
| tree | 9e83d6d8f61e56f5d3425b8709314d6bdb9315a9 /antlr4-cpp-runtime-4.9.2-source/runtime/src/atn | |
| parent | 9f94b672a5dc32da5ad01742bd4e976315a30d9c (diff) | |
| download | toc-main.tar.gz toc-main.zip | |
Diffstat (limited to 'antlr4-cpp-runtime-4.9.2-source/runtime/src/atn')
133 files changed, 12893 insertions, 0 deletions
diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.cpp new file mode 100644 index 0000000..2334e6b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.cpp @@ -0,0 +1,209 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LL1Analyzer.h" +#include "Token.h" +#include "atn/RuleTransition.h" +#include "misc/IntervalSet.h" +#include "RuleContext.h" +#include "atn/DecisionState.h" +#include "Recognizer.h" +#include "atn/ATNType.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "atn/ATN.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +ATN::ATN() : ATN(ATNType::LEXER, 0) { +} + +ATN::ATN(ATN &&other) { + // All source vectors are implicitly cleared by the moves. + states = std::move(other.states); + decisionToState = std::move(other.decisionToState); + ruleToStartState = std::move(other.ruleToStartState); + ruleToStopState = std::move(other.ruleToStopState); + grammarType = std::move(other.grammarType); + maxTokenType = std::move(other.maxTokenType); + ruleToTokenType = std::move(other.ruleToTokenType); + lexerActions = std::move(other.lexerActions); + modeToStartState = std::move(other.modeToStartState); +} + +ATN::ATN(ATNType grammarType_, size_t maxTokenType_) : grammarType(grammarType_), maxTokenType(maxTokenType_) { +} + +ATN::~ATN() { + for (ATNState *state : states) { + delete state; + } +} + +/** + * Required to be defined (even though not used) as we have an explicit move assignment operator. + */ +ATN& ATN::operator = (ATN &other) NOEXCEPT { + states = other.states; + decisionToState = other.decisionToState; + ruleToStartState = other.ruleToStartState; + ruleToStopState = other.ruleToStopState; + grammarType = other.grammarType; + maxTokenType = other.maxTokenType; + ruleToTokenType = other.ruleToTokenType; + lexerActions = other.lexerActions; + modeToStartState = other.modeToStartState; + + return *this; +} + +/** + * Explicit move assignment operator to make this the preferred assignment. With implicit copy/move assignment + * operators it seems the copy operator is preferred causing trouble when releasing the allocated ATNState instances. + */ +ATN& ATN::operator = (ATN &&other) NOEXCEPT { + // All source vectors are implicitly cleared by the moves. + states = std::move(other.states); + decisionToState = std::move(other.decisionToState); + ruleToStartState = std::move(other.ruleToStartState); + ruleToStopState = std::move(other.ruleToStopState); + grammarType = std::move(other.grammarType); + maxTokenType = std::move(other.maxTokenType); + ruleToTokenType = std::move(other.ruleToTokenType); + lexerActions = std::move(other.lexerActions); + modeToStartState = std::move(other.modeToStartState); + + return *this; +} + +misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const { + LL1Analyzer analyzer(*this); + return analyzer.LOOK(s, ctx); + +} + +misc::IntervalSet const& ATN::nextTokens(ATNState *s) const { + if (!s->_nextTokenUpdated) { + std::unique_lock<std::mutex> lock { _mutex }; + if (!s->_nextTokenUpdated) { + s->_nextTokenWithinRule = nextTokens(s, nullptr); + s->_nextTokenUpdated = true; + } + } + return s->_nextTokenWithinRule; +} + +void ATN::addState(ATNState *state) { + if (state != nullptr) { + //state->atn = this; + state->stateNumber = static_cast<int>(states.size()); + } + + states.push_back(state); +} + +void ATN::removeState(ATNState *state) { + delete states.at(state->stateNumber);// just free mem, don't shift states in list + states.at(state->stateNumber) = nullptr; +} + +int ATN::defineDecisionState(DecisionState *s) { + decisionToState.push_back(s); + s->decision = static_cast<int>(decisionToState.size() - 1); + return s->decision; +} + +DecisionState *ATN::getDecisionState(size_t decision) const { + if (!decisionToState.empty()) { + return decisionToState[decision]; + } + return nullptr; +} + +size_t ATN::getNumberOfDecisions() const { + return decisionToState.size(); +} + +misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *context) const { + if (stateNumber == ATNState::INVALID_STATE_NUMBER || stateNumber >= states.size()) { + throw IllegalArgumentException("Invalid state number."); + } + + RuleContext *ctx = context; + ATNState *s = states.at(stateNumber); + misc::IntervalSet following = nextTokens(s); + if (!following.contains(Token::EPSILON)) { + return following; + } + + misc::IntervalSet expected; + expected.addAll(following); + expected.remove(Token::EPSILON); + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + ATNState *invokingState = states.at(ctx->invokingState); + RuleTransition *rt = static_cast<RuleTransition*>(invokingState->transitions[0]); + following = nextTokens(rt->followState); + expected.addAll(following); + expected.remove(Token::EPSILON); + + if (ctx->parent == nullptr) { + break; + } + ctx = static_cast<RuleContext *>(ctx->parent); + } + + if (following.contains(Token::EPSILON)) { + expected.add(Token::EOF); + } + + return expected; +} + +std::string ATN::toString() const { + std::stringstream ss; + std::string type; + switch (grammarType) { + case ATNType::LEXER: + type = "LEXER "; + break; + + case ATNType::PARSER: + type = "PARSER "; + break; + + default: + break; + } + ss << "(" << type << "ATN " << std::hex << this << std::dec << ") maxTokenType: " << maxTokenType << std::endl; + ss << "states (" << states.size() << ") {" << std::endl; + + size_t index = 0; + for (auto *state : states) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + index = 0; + for (auto *state : decisionToState) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + ss << "}"; + + return ss.str(); +} + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.h new file mode 100644 index 0000000..125469b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATN.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATN { + public: +#if __cplusplus >= 201703L + static constexpr size_t INVALID_ALT_NUMBER = 0; +#else + enum : size_t { + INVALID_ALT_NUMBER = 0, + }; +#endif + + /// Used for runtime deserialization of ATNs from strings. + ATN(); + ATN(ATN &&other); + ATN(ATNType grammarType, size_t maxTokenType); + virtual ~ATN(); + + std::vector<ATNState *> states; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + std::vector<DecisionState *> decisionToState; + + /// Maps from rule index to starting state number. + std::vector<RuleStartState *> ruleToStartState; + + /// Maps from rule index to stop state number. + std::vector<RuleStopState *> ruleToStopState; + + /// The type of the ATN. + ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + size_t maxTokenType; + + /// <summary> + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// <seealso cref="ATNDeserializationOptions#isGenerateRuleBypassTransitions"/> + /// deserialization option was specified; otherwise, this is {@code null}. + /// </summary> + std::vector<size_t> ruleToTokenType; + + /// For lexer ATNs, this is an array of {@link LexerAction} objects which may + /// be referenced by action transitions in the ATN. + std::vector<Ref<LexerAction>> lexerActions; + + std::vector<TokensStartState *> modeToStartState; + + ATN& operator = (ATN &other) NOEXCEPT; + ATN& operator = (ATN &&other) NOEXCEPT; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in state {@code s}. + /// If {@code ctx} is null, the set of tokens will not include what can follow + /// the rule surrounding {@code s}. In other words, the set will be + /// restricted to tokens reachable staying within {@code s}'s rule. + /// </summary> + virtual misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in {@code s} and + /// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of + /// rule. + /// </summary> + virtual misc::IntervalSet const& nextTokens(ATNState *s) const; + + virtual void addState(ATNState *state); + + virtual void removeState(ATNState *state); + + virtual int defineDecisionState(DecisionState *s); + + virtual DecisionState *getDecisionState(size_t decision) const; + + virtual size_t getNumberOfDecisions() const; + + /// <summary> + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// <seealso cref="RuleStopState"/> of the outermost context without matching any + /// symbols, <seealso cref="Token#EOF"/> is added to the returned set. + /// <p/> + /// If {@code context} is {@code null}, it is treated as + /// <seealso cref="ParserRuleContext#EMPTY"/>. + /// </summary> + /// <param name="stateNumber"> the ATN state number </param> + /// <param name="context"> the full parse context </param> + /// <returns> The set of potentially valid input symbols which could follow the + /// specified state in the specified context. </returns> + /// <exception cref="IllegalArgumentException"> if the ATN does not contain a state with + /// number {@code stateNumber} </exception> + virtual misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const; + + std::string toString() const; + + private: + mutable std::mutex _mutex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.cpp new file mode 100644 index 0000000..c490f04 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.cpp @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" + +#include "atn/ATNConfig.h" + +using namespace antlr4::atn; + +ATNConfig::ATNConfig(ATNState *state_, size_t alt_, Ref<PredictionContext> const& context_) + : ATNConfig(state_, alt_, context_, SemanticContext::NONE) { +} + +ATNConfig::ATNConfig(ATNState *state_, size_t alt_, Ref<PredictionContext> const& context_, Ref<SemanticContext> const& semanticContext_) + : state(state_), alt(alt_), context(context_), semanticContext(semanticContext_) { + reachesIntoOuterContext = 0; +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c) : ATNConfig(c, c->state, c->context, c->semanticContext) { +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c, ATNState *state_) : ATNConfig(c, state_, c->context, c->semanticContext) { +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<SemanticContext> const& semanticContext) + : ATNConfig(c, state, c->context, semanticContext) { +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c, Ref<SemanticContext> const& semanticContext) + : ATNConfig(c, c->state, c->context, semanticContext) { +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context) + : ATNConfig(c, state, context, c->semanticContext) { +} + +ATNConfig::ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context, + Ref<SemanticContext> const& semanticContext) + : state(state), alt(c->alt), context(context), reachesIntoOuterContext(c->reachesIntoOuterContext), + semanticContext(semanticContext) { +} + +ATNConfig::~ATNConfig() { +} + +size_t ATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +size_t ATNConfig::getOuterContextDepth() const { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; +} + +bool ATNConfig::isPrecedenceFilterSuppressed() const { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; +} + +void ATNConfig::setPrecedenceFilterSuppressed(bool value) { + if (value) { + reachesIntoOuterContext |= SUPPRESS_PRECEDENCE_FILTER; + } else { + reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } +} + +bool ATNConfig::operator == (const ATNConfig &other) const { + return state->stateNumber == other.state->stateNumber && alt == other.alt && + ((context == other.context) || (*context == *other.context)) && + *semanticContext == *other.semanticContext && + isPrecedenceFilterSuppressed() == other.isPrecedenceFilterSuppressed(); +} + +bool ATNConfig::operator != (const ATNConfig &other) const { + return !operator==(other); +} + +std::string ATNConfig::toString() { + return toString(true); +} + +std::string ATNConfig::toString(bool showAlt) { + std::stringstream ss; + ss << "("; + + ss << state->toString(); + if (showAlt) { + ss << "," << alt; + } + if (context) { + ss << ",[" << context->toString() << "]"; + } + if (semanticContext != nullptr && semanticContext != SemanticContext::NONE) { + ss << "," << semanticContext.get(); + } + if (getOuterContextDepth() > 0) { + ss << ",up=" << getOuterContextDepth(); + } + ss << ')'; + + return ss.str(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.h new file mode 100644 index 0000000..767655b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfig.h @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A tuple: (ATN state, predicted alt, syntactic, semantic context). + /// The syntactic context is a graph-structured stack node whose + /// path(s) to the root is the rule invocation(s) + /// chain used to arrive at the state. The semantic context is + /// the tree of semantic predicates encountered before reaching + /// an ATN state. + /// </summary> + class ANTLR4CPP_PUBLIC ATNConfig { + public: + struct Hasher + { + size_t operator()(ATNConfig const& k) const { + return k.hashCode(); + } + }; + + struct Comparer { + bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const { + return (&lhs == &rhs) || (lhs == rhs); + } + }; + + + using Set = std::unordered_set<Ref<ATNConfig>, Hasher, Comparer>; + + /// The ATN state associated with this configuration. + ATNState * state; + + /// What alt (or lexer rule) is predicted by this configuration. + const size_t alt; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + /// + /// Can be shared between multiple ANTConfig instances. + Ref<PredictionContext> context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + * <p> + * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a boolean then</p> + * + * <p> + * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is publicly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing {@link ATNConfig} + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * <em>completely</em> unaffected by the change.</p> + */ + size_t reachesIntoOuterContext; + + /// Can be shared between multiple ATNConfig instances. + Ref<SemanticContext> semanticContext; + + ATNConfig(ATNState *state, size_t alt, Ref<PredictionContext> const& context); + ATNConfig(ATNState *state, size_t alt, Ref<PredictionContext> const& context, Ref<SemanticContext> const& semanticContext); + + ATNConfig(Ref<ATNConfig> const& c); // dup + ATNConfig(Ref<ATNConfig> const& c, ATNState *state); + ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<SemanticContext> const& semanticContext); + ATNConfig(Ref<ATNConfig> const& c, Ref<SemanticContext> const& semanticContext); + ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context); + ATNConfig(Ref<ATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context, Ref<SemanticContext> const& semanticContext); + + ATNConfig(ATNConfig const&) = default; + virtual ~ATNConfig(); + + virtual size_t hashCode() const; + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + size_t getOuterContextDepth() const ; + bool isPrecedenceFilterSuppressed() const; + void setPrecedenceFilterSuppressed(bool value); + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + bool operator == (const ATNConfig &other) const; + bool operator != (const ATNConfig &other) const; + + virtual std::string toString(); + std::string toString(bool showAlt); + + private: + /** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ +#if __cplusplus >= 201703L + static constexpr size_t SUPPRESS_PRECEDENCE_FILTER = 0x40000000; +#else + enum : size_t { + SUPPRESS_PRECEDENCE_FILTER = 0x40000000, + }; +#endif + }; + +} // namespace atn +} // namespace antlr4 + + +// Hash function for ATNConfig. + +namespace std { + using antlr4::atn::ATNConfig; + + template <> struct hash<ATNConfig> + { + size_t operator() (const ATNConfig &x) const + { + return x.hashCode(); + } + }; + + template <> struct hash<std::vector<Ref<ATNConfig>>> + { + size_t operator() (const std::vector<Ref<ATNConfig>> &vector) const + { + std::size_t seed = 0; + for (const auto &config : vector) { + seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + }; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.cpp new file mode 100644 index 0000000..7a480a4 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.cpp @@ -0,0 +1,228 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNSimulator.h" +#include "Exceptions.h" +#include "atn/SemanticContext.h" +#include "support/Arrays.h" + +#include "atn/ATNConfigSet.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +ATNConfigSet::ATNConfigSet(bool fullCtx) : fullCtx(fullCtx) { + InitializeInstanceFields(); +} + +ATNConfigSet::ATNConfigSet(const Ref<ATNConfigSet> &old) : ATNConfigSet(old->fullCtx) { + addAll(old); + uniqueAlt = old->uniqueAlt; + conflictingAlts = old->conflictingAlts; + hasSemanticContext = old->hasSemanticContext; + dipsIntoOuterContext = old->dipsIntoOuterContext; +} + +ATNConfigSet::~ATNConfigSet() { +} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config) { + return add(config, nullptr); +} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache) { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (config->semanticContext != SemanticContext::NONE) { + hasSemanticContext = true; + } + if (config->getOuterContextDepth() > 0) { + dipsIntoOuterContext = true; + } + + size_t hash = getHash(config.get()); + ATNConfig *existing = _configLookup[hash]; + if (existing == nullptr) { + _configLookup[hash] = config.get(); + _cachedHashCode = 0; + configs.push_back(config); // track order here + + return true; + } + + // a previous (s,i,pi,_), merge with it and save result + bool rootIsWildcard = !fullCtx; + Ref<PredictionContext> merged = PredictionContext::merge(existing->context, config->context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + existing->reachesIntoOuterContext = std::max(existing->reachesIntoOuterContext, config->reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config->isPrecedenceFilterSuppressed()) { + existing->setPrecedenceFilterSuppressed(true); + } + + existing->context = merged; // replace context; no need to alt mapping + + return true; +} + +bool ATNConfigSet::addAll(const Ref<ATNConfigSet> &other) { + for (auto &c : other->configs) { + add(c); + } + return false; +} + +std::vector<ATNState*> ATNConfigSet::getStates() { + std::vector<ATNState*> states; + for (auto c : configs) { + states.push_back(c->state); + } + return states; +} + +/** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + +BitSet ATNConfigSet::getAlts() { + BitSet alts; + for (ATNConfig config : configs) { + alts.set(config.alt); + } + return alts; +} + +std::vector<Ref<SemanticContext>> ATNConfigSet::getPredicates() { + std::vector<Ref<SemanticContext>> preds; + for (auto c : configs) { + if (c->semanticContext != SemanticContext::NONE) { + preds.push_back(c->semanticContext); + } + } + return preds; +} + +Ref<ATNConfig> ATNConfigSet::get(size_t i) const { + return configs[i]; +} + +void ATNConfigSet::optimizeConfigs(ATNSimulator *interpreter) { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (_configLookup.empty()) + return; + + for (auto &config : configs) { + config->context = interpreter->getCachedContext(config->context); + } +} + +bool ATNConfigSet::operator == (const ATNConfigSet &other) { + if (&other == this) { + return true; + } + + if (configs.size() != other.configs.size()) + return false; + + if (fullCtx != other.fullCtx || uniqueAlt != other.uniqueAlt || + conflictingAlts != other.conflictingAlts || hasSemanticContext != other.hasSemanticContext || + dipsIntoOuterContext != other.dipsIntoOuterContext) // includes stack context + return false; + + return Arrays::equals(configs, other.configs); +} + +size_t ATNConfigSet::hashCode() { + if (!isReadonly() || _cachedHashCode == 0) { + _cachedHashCode = 1; + for (auto &i : configs) { + _cachedHashCode = 31 * _cachedHashCode + i->hashCode(); // Same as Java's list hashCode impl. + } + } + + return _cachedHashCode; +} + +size_t ATNConfigSet::size() { + return configs.size(); +} + +bool ATNConfigSet::isEmpty() { + return configs.empty(); +} + +void ATNConfigSet::clear() { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + configs.clear(); + _cachedHashCode = 0; + _configLookup.clear(); +} + +bool ATNConfigSet::isReadonly() { + return _readonly; +} + +void ATNConfigSet::setReadonly(bool readonly) { + _readonly = readonly; + _configLookup.clear(); +} + +std::string ATNConfigSet::toString() { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < configs.size(); i++) { + ss << configs[i]->toString(); + } + ss << "]"; + + if (hasSemanticContext) { + ss << ",hasSemanticContext = " << hasSemanticContext; + } + if (uniqueAlt != ATN::INVALID_ALT_NUMBER) { + ss << ",uniqueAlt = " << uniqueAlt; + } + + if (conflictingAlts.size() > 0) { + ss << ",conflictingAlts = "; + ss << conflictingAlts.toString(); + } + + if (dipsIntoOuterContext) { + ss << ", dipsIntoOuterContext"; + } + return ss.str(); +} + +size_t ATNConfigSet::getHash(ATNConfig *c) { + size_t hashCode = 7; + hashCode = 31 * hashCode + c->state->stateNumber; + hashCode = 31 * hashCode + c->alt; + hashCode = 31 * hashCode + c->semanticContext->hashCode(); + return hashCode; +} + +void ATNConfigSet::InitializeInstanceFields() { + uniqueAlt = 0; + hasSemanticContext = false; + dipsIntoOuterContext = false; + + _readonly = false; + _cachedHashCode = 0; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.h new file mode 100644 index 0000000..850a07c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNConfigSet.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + /// Specialized set that can track info about the set, with support for combining similar configurations using a + /// graph-structured stack. + class ANTLR4CPP_PUBLIC ATNConfigSet { + public: + /// Track the elements as they are added to the set; supports get(i) + std::vector<Ref<ATNConfig>> configs; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + size_t uniqueAlt; + + /** Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + antlrcpp::BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext; + bool dipsIntoOuterContext; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + const bool fullCtx; + + ATNConfigSet(bool fullCtx = true); + ATNConfigSet(const Ref<ATNConfigSet> &old); + + virtual ~ATNConfigSet(); + + virtual bool add(const Ref<ATNConfig> &config); + + /// <summary> + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where {@code s} is the + /// <seealso cref="ATNConfig#state"/>, {@code i} is the <seealso cref="ATNConfig#alt"/>, and + /// {@code pi} is the <seealso cref="ATNConfig#semanticContext"/>. We use + /// {@code (s,i,pi)} as key. + /// <p/> + /// This method updates <seealso cref="#dipsIntoOuterContext"/> and + /// <seealso cref="#hasSemanticContext"/> when necessary. + /// </summary> + virtual bool add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache); + + virtual std::vector<ATNState *> getStates(); + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + antlrcpp::BitSet getAlts(); + virtual std::vector<Ref<SemanticContext>> getPredicates(); + + virtual Ref<ATNConfig> get(size_t i) const; + + virtual void optimizeConfigs(ATNSimulator *interpreter); + + bool addAll(const Ref<ATNConfigSet> &other); + + bool operator == (const ATNConfigSet &other); + virtual size_t hashCode(); + virtual size_t size(); + virtual bool isEmpty(); + virtual void clear(); + virtual bool isReadonly(); + virtual void setReadonly(bool readonly); + virtual std::string toString(); + + protected: + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readonly; + + virtual size_t getHash(ATNConfig *c); // Hash differs depending on set type. + + private: + size_t _cachedHashCode; + + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + std::unordered_map<size_t, ATNConfig *> _configLookup; + + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.cpp new file mode 100644 index 0000000..a406c4e --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" + +using namespace antlr4::atn; + +ATNDeserializationOptions ATNDeserializationOptions::defaultOptions; + +ATNDeserializationOptions::ATNDeserializationOptions() { + InitializeInstanceFields(); +} + +ATNDeserializationOptions::ATNDeserializationOptions(ATNDeserializationOptions *options) : ATNDeserializationOptions() { + this->verifyATN = options->verifyATN; + this->generateRuleBypassTransitions = options->generateRuleBypassTransitions; +} + +ATNDeserializationOptions::~ATNDeserializationOptions() { +} + +const ATNDeserializationOptions& ATNDeserializationOptions::getDefaultOptions() { + return defaultOptions; +} + +bool ATNDeserializationOptions::isReadOnly() { + return readOnly; +} + +void ATNDeserializationOptions::makeReadOnly() { + readOnly = true; +} + +bool ATNDeserializationOptions::isVerifyATN() { + return verifyATN; +} + +void ATNDeserializationOptions::setVerifyATN(bool verify) { + throwIfReadOnly(); + verifyATN = verify; +} + +bool ATNDeserializationOptions::isGenerateRuleBypassTransitions() { + return generateRuleBypassTransitions; +} + +void ATNDeserializationOptions::setGenerateRuleBypassTransitions(bool generate) { + throwIfReadOnly(); + generateRuleBypassTransitions = generate; +} + +void ATNDeserializationOptions::throwIfReadOnly() { + if (isReadOnly()) { + throw "The object is read only."; + } +} + +void ATNDeserializationOptions::InitializeInstanceFields() { + readOnly = false; + verifyATN = true; + generateRuleBypassTransitions = false; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.h new file mode 100644 index 0000000..66aa37d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializationOptions.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializationOptions { + private: + static ATNDeserializationOptions defaultOptions; + + bool readOnly; + bool verifyATN; + bool generateRuleBypassTransitions; + + public: + ATNDeserializationOptions(); + ATNDeserializationOptions(ATNDeserializationOptions *options); + ATNDeserializationOptions(ATNDeserializationOptions const&) = default; + virtual ~ATNDeserializationOptions(); + ATNDeserializationOptions& operator=(ATNDeserializationOptions const&) = default; + + static const ATNDeserializationOptions& getDefaultOptions(); + + bool isReadOnly(); + + void makeReadOnly(); + + bool isVerifyATN(); + + void setVerifyATN(bool verify); + + bool isGenerateRuleBypassTransitions(); + + void setGenerateRuleBypassTransitions(bool generate); + + protected: + virtual void throwIfReadOnly(); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.cpp new file mode 100644 index 0000000..d1d622a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.cpp @@ -0,0 +1,756 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" + +#include "atn/ATNType.h" +#include "atn/ATNState.h" +#include "atn/ATN.h" + +#include "atn/LoopEndState.h" +#include "atn/DecisionState.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/TokensStartState.h" +#include "atn/RuleTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PlusBlockStartState.h" +#include "atn/StarLoopbackState.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/StarLoopEntryState.h" + +#include "atn/AtomTransition.h" +#include "atn/StarBlockStartState.h" +#include "atn/RangeTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/SetTransition.h" +#include "atn/NotSetTransition.h" +#include "atn/WildcardTransition.h" +#include "Token.h" + +#include "misc/IntervalSet.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" +#include "support/StringUtils.h" + +#include "atn/LexerCustomAction.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" + +#include "atn/ATNDeserializer.h" + +#include <string> + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + +uint32_t deserializeInt32(const std::vector<uint16_t>& data, size_t offset) { + return (uint32_t)data[offset] | ((uint32_t)data[offset + 1] << 16); +} + +ssize_t readUnicodeInt(const std::vector<uint16_t>& data, int& p) { + return static_cast<ssize_t>(data[p++]); +} + +ssize_t readUnicodeInt32(const std::vector<uint16_t>& data, int& p) { + auto result = deserializeInt32(data, p); + p += 2; + return static_cast<ssize_t>(result); +} + +// We templatize this on the function type so the optimizer can inline +// the 16- or 32-bit readUnicodeInt/readUnicodeInt32 as needed. +template <typename F> +void deserializeSets( + const std::vector<uint16_t>& data, + int& p, + std::vector<misc::IntervalSet>& sets, + F readUnicode) { + int nsets = data[p++]; + for (int i = 0; i < nsets; i++) { + int nintervals = data[p++]; + misc::IntervalSet set; + + bool containsEof = data[p++] != 0; + if (containsEof) { + set.add(-1); + } + + for (int j = 0; j < nintervals; j++) { + auto a = readUnicode(data, p); + auto b = readUnicode(data, p); + set.add(a, b); + } + sets.push_back(set); + } +} + +} + +ATNDeserializer::ATNDeserializer(): ATNDeserializer(ATNDeserializationOptions::getDefaultOptions()) { +} + +ATNDeserializer::ATNDeserializer(const ATNDeserializationOptions& dso): deserializationOptions(dso) { +} + +ATNDeserializer::~ATNDeserializer() { +} + +/** + * This value should never change. Updates following this version are + * reflected as change in the unique ID SERIALIZED_UUID. + */ +Guid ATNDeserializer::ADDED_PRECEDENCE_TRANSITIONS() { + return Guid("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"); +} + +Guid ATNDeserializer::ADDED_LEXER_ACTIONS() { + return Guid("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"); +} + +Guid ATNDeserializer::ADDED_UNICODE_SMP() { + return Guid("59627784-3BE5-417A-B9EB-8131A7286089"); +} + +Guid ATNDeserializer::SERIALIZED_UUID() { + return ADDED_UNICODE_SMP(); +} + +Guid ATNDeserializer::BASE_SERIALIZED_UUID() { + return Guid("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"); +} + +std::vector<Guid>& ATNDeserializer::SUPPORTED_UUIDS() { + static std::vector<Guid> singleton = { BASE_SERIALIZED_UUID(), ADDED_PRECEDENCE_TRANSITIONS(), ADDED_LEXER_ACTIONS(), ADDED_UNICODE_SMP() }; + return singleton; +} + +bool ATNDeserializer::isFeatureSupported(const Guid &feature, const Guid &actualUuid) { + auto featureIterator = std::find(SUPPORTED_UUIDS().begin(), SUPPORTED_UUIDS().end(), feature); + if (featureIterator == SUPPORTED_UUIDS().end()) { + return false; + } + auto actualIterator = std::find(SUPPORTED_UUIDS().begin(), SUPPORTED_UUIDS().end(), actualUuid); + if (actualIterator == SUPPORTED_UUIDS().end()) { + return false; + } + + return std::distance(featureIterator, actualIterator) >= 0; +} + +ATN ATNDeserializer::deserialize(const std::vector<uint16_t>& input) { + // Don't adjust the first value since that's the version number. + std::vector<uint16_t> data(input.size()); + data[0] = input[0]; + for (size_t i = 1; i < input.size(); ++i) { + data[i] = input[i] - 2; + } + + int p = 0; + int version = data[p++]; + if (version != SERIALIZED_VERSION) { + std::string reason = "Could not deserialize ATN with version" + std::to_string(version) + "(expected " + std::to_string(SERIALIZED_VERSION) + ")."; + + throw UnsupportedOperationException(reason); + } + + Guid uuid = toUUID(data.data(), p); + p += 8; + auto uuidIterator = std::find(SUPPORTED_UUIDS().begin(), SUPPORTED_UUIDS().end(), uuid); + if (uuidIterator == SUPPORTED_UUIDS().end()) { + std::string reason = "Could not deserialize ATN with UUID " + uuid.toString() + " (expected " + + SERIALIZED_UUID().toString() + " or a legacy UUID)."; + + throw UnsupportedOperationException(reason); + } + + bool supportsPrecedencePredicates = isFeatureSupported(ADDED_PRECEDENCE_TRANSITIONS(), uuid); + bool supportsLexerActions = isFeatureSupported(ADDED_LEXER_ACTIONS(), uuid); + + ATNType grammarType = (ATNType)data[p++]; + size_t maxTokenType = data[p++]; + ATN atn(grammarType, maxTokenType); + + // + // STATES + // + std::vector<std::pair<LoopEndState*, size_t>> loopBackStateNumbers; + std::vector<std::pair<BlockStartState*, size_t>> endStateNumbers; + size_t nstates = data[p++]; + for (size_t i = 0; i < nstates; i++) { + size_t stype = data[p++]; + // ignore bad type of states + if (stype == ATNState::ATN_INVALID_TYPE) { + atn.addState(nullptr); + continue; + } + + size_t ruleIndex = data[p++]; + if (ruleIndex == 0xFFFF) { + ruleIndex = INVALID_INDEX; + } + + ATNState *s = stateFactory(stype, ruleIndex); + if (stype == ATNState::LOOP_END) { // special case + int loopBackStateNumber = data[p++]; + loopBackStateNumbers.push_back({ (LoopEndState*)s, loopBackStateNumber }); + } else if (is<BlockStartState*>(s)) { + int endStateNumber = data[p++]; + endStateNumbers.push_back({ (BlockStartState*)s, endStateNumber }); + } + atn.addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (auto &pair : loopBackStateNumbers) { + pair.first->loopBackState = atn.states[pair.second]; + } + + for (auto &pair : endStateNumbers) { + pair.first->endState = (BlockEndState*)atn.states[pair.second]; + } + + size_t numNonGreedyStates = data[p++]; + for (size_t i = 0; i < numNonGreedyStates; i++) { + size_t stateNumber = data[p++]; + // The serialized ATN must be specifying the right states, so that the + // cast below is correct. + ((DecisionState *)atn.states[stateNumber])->nonGreedy = true; + } + + if (supportsPrecedencePredicates) { + size_t numPrecedenceStates = data[p++]; + for (size_t i = 0; i < numPrecedenceStates; i++) { + size_t stateNumber = data[p++]; + ((RuleStartState *)atn.states[stateNumber])->isLeftRecursiveRule = true; + } + } + + // + // RULES + // + size_t nrules = data[p++]; + for (size_t i = 0; i < nrules; i++) { + size_t s = data[p++]; + // Also here, the serialized atn must ensure to point to the correct class type. + RuleStartState *startState = (RuleStartState*)atn.states[s]; + atn.ruleToStartState.push_back(startState); + if (atn.grammarType == ATNType::LEXER) { + size_t tokenType = data[p++]; + if (tokenType == 0xFFFF) { + tokenType = Token::EOF; + } + + atn.ruleToTokenType.push_back(tokenType); + + if (!isFeatureSupported(ADDED_LEXER_ACTIONS(), uuid)) { + // this piece of unused metadata was serialized prior to the + // addition of LexerAction + //int actionIndexIgnored = data[p++]; + p++; + } + } + } + + atn.ruleToStopState.resize(nrules); + for (ATNState *state : atn.states) { + if (!is<RuleStopState*>(state)) { + continue; + } + + RuleStopState *stopState = static_cast<RuleStopState*>(state); + atn.ruleToStopState[state->ruleIndex] = stopState; + atn.ruleToStartState[state->ruleIndex]->stopState = stopState; + } + + // + // MODES + // + size_t nmodes = data[p++]; + for (size_t i = 0; i < nmodes; i++) { + size_t s = data[p++]; + atn.modeToStartState.push_back(static_cast<TokensStartState*>(atn.states[s])); + } + + // + // SETS + // + std::vector<misc::IntervalSet> sets; + + // First, deserialize sets with 16-bit arguments <= U+FFFF. + deserializeSets(data, p, sets, readUnicodeInt); + + // Next, if the ATN was serialized with the Unicode SMP feature, + // deserialize sets with 32-bit arguments <= U+10FFFF. + if (isFeatureSupported(ADDED_UNICODE_SMP(), uuid)) { + deserializeSets(data, p, sets, readUnicodeInt32); + } + + // + // EDGES + // + int nedges = data[p++]; + for (int i = 0; i < nedges; i++) { + size_t src = data[p]; + size_t trg = data[p + 1]; + size_t ttype = data[p + 2]; + size_t arg1 = data[p + 3]; + size_t arg2 = data[p + 4]; + size_t arg3 = data[p + 5]; + Transition *trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); + ATNState *srcState = atn.states[src]; + srcState->addTransition(trans); + p += 6; + } + + // edges for rule stop states can be derived, so they aren't serialized + for (ATNState *state : atn.states) { + for (size_t i = 0; i < state->transitions.size(); i++) { + Transition *t = state->transitions[i]; + if (!is<RuleTransition*>(t)) { + continue; + } + + RuleTransition *ruleTransition = static_cast<RuleTransition*>(t); + size_t outermostPrecedenceReturn = INVALID_INDEX; + if (atn.ruleToStartState[ruleTransition->target->ruleIndex]->isLeftRecursiveRule) { + if (ruleTransition->precedence == 0) { + outermostPrecedenceReturn = ruleTransition->target->ruleIndex; + } + } + + EpsilonTransition *returnTransition = new EpsilonTransition(ruleTransition->followState, outermostPrecedenceReturn); /* mem check: freed in ANTState d-tor */ + atn.ruleToStopState[ruleTransition->target->ruleIndex]->addTransition(returnTransition); + } + } + + for (ATNState *state : atn.states) { + if (is<BlockStartState *>(state)) { + BlockStartState *startState = static_cast<BlockStartState *>(state); + + // we need to know the end state to set its start state + if (startState->endState == nullptr) { + throw IllegalStateException(); + } + + // block end states can only be associated to a single block start state + if (startState->endState->startState != nullptr) { + throw IllegalStateException(); + } + + startState->endState->startState = static_cast<BlockStartState*>(state); + } + + if (is<PlusLoopbackState*>(state)) { + PlusLoopbackState *loopbackState = static_cast<PlusLoopbackState *>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (is<PlusBlockStartState *>(target)) { + (static_cast<PlusBlockStartState *>(target))->loopBackState = loopbackState; + } + } + } else if (is<StarLoopbackState *>(state)) { + StarLoopbackState *loopbackState = static_cast<StarLoopbackState *>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (is<StarLoopEntryState *>(target)) { + (static_cast<StarLoopEntryState*>(target))->loopBackState = loopbackState; + } + } + } + } + + // + // DECISIONS + // + size_t ndecisions = data[p++]; + for (size_t i = 1; i <= ndecisions; i++) { + size_t s = data[p++]; + DecisionState *decState = dynamic_cast<DecisionState*>(atn.states[s]); + if (decState == nullptr) + throw IllegalStateException(); + + atn.decisionToState.push_back(decState); + decState->decision = (int)i - 1; + } + + // + // LEXER ACTIONS + // + if (atn.grammarType == ATNType::LEXER) { + if (supportsLexerActions) { + atn.lexerActions.resize(data[p++]); + for (size_t i = 0; i < atn.lexerActions.size(); i++) { + LexerActionType actionType = (LexerActionType)data[p++]; + int data1 = data[p++]; + if (data1 == 0xFFFF) { + data1 = -1; + } + + int data2 = data[p++]; + if (data2 == 0xFFFF) { + data2 = -1; + } + + atn.lexerActions[i] = lexerActionFactory(actionType, data1, data2); + } + } else { + // for compatibility with older serialized ATNs, convert the old + // serialized action index for action transitions to the new + // form, which is the index of a LexerCustomAction + for (ATNState *state : atn.states) { + for (size_t i = 0; i < state->transitions.size(); i++) { + Transition *transition = state->transitions[i]; + if (!is<ActionTransition *>(transition)) { + continue; + } + + size_t ruleIndex = static_cast<ActionTransition *>(transition)->ruleIndex; + size_t actionIndex = static_cast<ActionTransition *>(transition)->actionIndex; + Ref<LexerCustomAction> lexerAction = std::make_shared<LexerCustomAction>(ruleIndex, actionIndex); + state->transitions[i] = new ActionTransition(transition->target, ruleIndex, atn.lexerActions.size(), false); /* mem-check freed in ATNState d-tor */ + delete transition; // ml: no longer needed since we just replaced it. + atn.lexerActions.push_back(lexerAction); + } + } + } + } + + markPrecedenceDecisions(atn); + + if (deserializationOptions.isVerifyATN()) { + verifyATN(atn); + } + + if (deserializationOptions.isGenerateRuleBypassTransitions() && atn.grammarType == ATNType::PARSER) { + atn.ruleToTokenType.resize(atn.ruleToStartState.size()); + for (size_t i = 0; i < atn.ruleToStartState.size(); i++) { + atn.ruleToTokenType[i] = int(atn.maxTokenType + i + 1); + } + + for (std::vector<RuleStartState*>::size_type i = 0; i < atn.ruleToStartState.size(); i++) { + BasicBlockStartState *bypassStart = new BasicBlockStartState(); /* mem check: freed in ATN d-tor */ + bypassStart->ruleIndex = (int)i; + atn.addState(bypassStart); + + BlockEndState *bypassStop = new BlockEndState(); /* mem check: freed in ATN d-tor */ + bypassStop->ruleIndex = (int)i; + atn.addState(bypassStop); + + bypassStart->endState = bypassStop; + atn.defineDecisionState(bypassStart); + + bypassStop->startState = bypassStart; + + ATNState *endState; + Transition *excludeTransition = nullptr; + if (atn.ruleToStartState[i]->isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = nullptr; + for (ATNState *state : atn.states) { + if (state->ruleIndex != i) { + continue; + } + + if (!is<StarLoopEntryState*>(state)) { + continue; + } + + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (!is<LoopEndState*>(maybeLoopEndState)) { + continue; + } + + if (maybeLoopEndState->epsilonOnlyTransitions && is<RuleStopState*>(maybeLoopEndState->transitions[0]->target)) { + endState = state; + break; + } + } + + if (endState == nullptr) { + throw UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); + + } + + excludeTransition = (static_cast<StarLoopEntryState*>(endState))->loopBackState->transitions[0]; + } else { + endState = atn.ruleToStopState[i]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (ATNState *state : atn.states) { + for (Transition *transition : state->transitions) { + if (transition == excludeTransition) { + continue; + } + + if (transition->target == endState) { + transition->target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn.ruleToStartState[i]->transitions.size() > 0) { + Transition *transition = atn.ruleToStartState[i]->removeTransition(atn.ruleToStartState[i]->transitions.size() - 1); + bypassStart->addTransition(transition); + } + + // link the new states + atn.ruleToStartState[i]->addTransition(new EpsilonTransition(bypassStart)); /* mem check: freed in ATNState d-tor */ + bypassStop->addTransition(new EpsilonTransition(endState)); /* mem check: freed in ATNState d-tor */ + + ATNState *matchState = new BasicState(); /* mem check: freed in ATN d-tor */ + atn.addState(matchState); + matchState->addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i])); /* mem check: freed in ATNState d-tor */ + bypassStart->addTransition(new EpsilonTransition(matchState)); /* mem check: freed in ATNState d-tor */ + } + + if (deserializationOptions.isVerifyATN()) { + // reverify after modification + verifyATN(atn); + } + } + + return atn; +} + +/** + * Analyze the {@link StarLoopEntryState} states in the specified ATN to set + * the {@link StarLoopEntryState#isPrecedenceDecision} field to the + * correct value. + * + * @param atn The ATN. + */ +void ATNDeserializer::markPrecedenceDecisions(const ATN &atn) { + for (ATNState *state : atn.states) { + if (!is<StarLoopEntryState *>(state)) { + continue; + } + + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state->ruleIndex]->isLeftRecursiveRule) { + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (is<LoopEndState *>(maybeLoopEndState)) { + if (maybeLoopEndState->epsilonOnlyTransitions && is<RuleStopState *>(maybeLoopEndState->transitions[0]->target)) { + static_cast<StarLoopEntryState *>(state)->isPrecedenceDecision = true; + } + } + } + } +} + +void ATNDeserializer::verifyATN(const ATN &atn) { + // verify assumptions + for (ATNState *state : atn.states) { + if (state == nullptr) { + continue; + } + + checkCondition(state->epsilonOnlyTransitions || state->transitions.size() <= 1); + + if (is<PlusBlockStartState *>(state)) { + checkCondition((static_cast<PlusBlockStartState *>(state))->loopBackState != nullptr); + } + + if (is<StarLoopEntryState *>(state)) { + StarLoopEntryState *starLoopEntryState = static_cast<StarLoopEntryState*>(state); + checkCondition(starLoopEntryState->loopBackState != nullptr); + checkCondition(starLoopEntryState->transitions.size() == 2); + + if (is<StarBlockStartState *>(starLoopEntryState->transitions[0]->target)) { + checkCondition(static_cast<LoopEndState *>(starLoopEntryState->transitions[1]->target) != nullptr); + checkCondition(!starLoopEntryState->nonGreedy); + } else if (is<LoopEndState *>(starLoopEntryState->transitions[0]->target)) { + checkCondition(is<StarBlockStartState *>(starLoopEntryState->transitions[1]->target)); + checkCondition(starLoopEntryState->nonGreedy); + } else { + throw IllegalStateException(); + + } + } + + if (is<StarLoopbackState *>(state)) { + checkCondition(state->transitions.size() == 1); + checkCondition(is<StarLoopEntryState *>(state->transitions[0]->target)); + } + + if (is<LoopEndState *>(state)) { + checkCondition((static_cast<LoopEndState *>(state))->loopBackState != nullptr); + } + + if (is<RuleStartState *>(state)) { + checkCondition((static_cast<RuleStartState *>(state))->stopState != nullptr); + } + + if (is<BlockStartState *>(state)) { + checkCondition((static_cast<BlockStartState *>(state))->endState != nullptr); + } + + if (is<BlockEndState *>(state)) { + checkCondition((static_cast<BlockEndState *>(state))->startState != nullptr); + } + + if (is<DecisionState *>(state)) { + DecisionState *decisionState = static_cast<DecisionState *>(state); + checkCondition(decisionState->transitions.size() <= 1 || decisionState->decision >= 0); + } else { + checkCondition(state->transitions.size() <= 1 || is<RuleStopState *>(state)); + } + } +} + +void ATNDeserializer::checkCondition(bool condition) { + checkCondition(condition, ""); +} + +void ATNDeserializer::checkCondition(bool condition, const std::string &message) { + if (!condition) { + throw IllegalStateException(message); + } +} + +Guid ATNDeserializer::toUUID(const unsigned short *data, size_t offset) { + return Guid((uint16_t *)data + offset, true); +} + +/* mem check: all created instances are freed in the d-tor of the ATNState they are added to. */ +Transition *ATNDeserializer::edgeFactory(const ATN &atn, size_t type, size_t /*src*/, size_t trg, size_t arg1, + size_t arg2, size_t arg3, + const std::vector<misc::IntervalSet> &sets) { + + ATNState *target = atn.states[trg]; + switch (type) { + case Transition::EPSILON: + return new EpsilonTransition(target); + case Transition::RANGE: + if (arg3 != 0) { + return new RangeTransition(target, Token::EOF, arg2); + } else { + return new RangeTransition(target, arg1, arg2); + } + case Transition::RULE: + return new RuleTransition(static_cast<RuleStartState*>(atn.states[arg1]), arg2, (int)arg3, target); + case Transition::PREDICATE: + return new PredicateTransition(target, arg1, arg2, arg3 != 0); + case Transition::PRECEDENCE: + return new PrecedencePredicateTransition(target, (int)arg1); + case Transition::ATOM: + if (arg3 != 0) { + return new AtomTransition(target, Token::EOF); + } else { + return new AtomTransition(target, arg1); + } + case Transition::ACTION: + return new ActionTransition(target, arg1, arg2, arg3 != 0); + case Transition::SET: + return new SetTransition(target, sets[arg1]); + case Transition::NOT_SET: + return new NotSetTransition(target, sets[arg1]); + case Transition::WILDCARD: + return new WildcardTransition(target); + } + + throw IllegalArgumentException("The specified transition type is not valid."); +} + +/* mem check: all created instances are freed in the d-tor of the ATN. */ +ATNState* ATNDeserializer::stateFactory(size_t type, size_t ruleIndex) { + ATNState *s; + switch (type) { + case ATNState::ATN_INVALID_TYPE: + return nullptr; + case ATNState::BASIC : + s = new BasicState(); + break; + case ATNState::RULE_START : + s = new RuleStartState(); + break; + case ATNState::BLOCK_START : + s = new BasicBlockStartState(); + break; + case ATNState::PLUS_BLOCK_START : + s = new PlusBlockStartState(); + break; + case ATNState::STAR_BLOCK_START : + s = new StarBlockStartState(); + break; + case ATNState::TOKEN_START : + s = new TokensStartState(); + break; + case ATNState::RULE_STOP : + s = new RuleStopState(); + break; + case ATNState::BLOCK_END : + s = new BlockEndState(); + break; + case ATNState::STAR_LOOP_BACK : + s = new StarLoopbackState(); + break; + case ATNState::STAR_LOOP_ENTRY : + s = new StarLoopEntryState(); + break; + case ATNState::PLUS_LOOP_BACK : + s = new PlusLoopbackState(); + break; + case ATNState::LOOP_END : + s = new LoopEndState(); + break; + default : + std::string message = "The specified state type " + std::to_string(type) + " is not valid."; + throw IllegalArgumentException(message); + } + + s->ruleIndex = ruleIndex; + return s; +} + +Ref<LexerAction> ATNDeserializer::lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType::CHANNEL: + return std::make_shared<LexerChannelAction>(data1); + + case LexerActionType::CUSTOM: + return std::make_shared<LexerCustomAction>(data1, data2); + + case LexerActionType::MODE: + return std::make_shared< LexerModeAction>(data1); + + case LexerActionType::MORE: + return LexerMoreAction::getInstance(); + + case LexerActionType::POP_MODE: + return LexerPopModeAction::getInstance(); + + case LexerActionType::PUSH_MODE: + return std::make_shared<LexerPushModeAction>(data1); + + case LexerActionType::SKIP: + return LexerSkipAction::getInstance(); + + case LexerActionType::TYPE: + return std::make_shared<LexerTypeAction>(data1); + + default: + throw IllegalArgumentException("The specified lexer action type " + std::to_string(static_cast<size_t>(type)) + + " is not valid."); + } +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.h new file mode 100644 index 0000000..12fd11d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNDeserializer.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/ATNDeserializationOptions.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializer { + public: +#if __cplusplus >= 201703L + static constexpr size_t SERIALIZED_VERSION = 3; +#else + enum : size_t { + SERIALIZED_VERSION = 3, + }; +#endif + + /// This is the current serialized UUID. + // ml: defined as function to avoid the “static initialization order fiasco”. + static Guid SERIALIZED_UUID(); + + ATNDeserializer(); + ATNDeserializer(const ATNDeserializationOptions& dso); + virtual ~ATNDeserializer(); + + static Guid toUUID(const unsigned short *data, size_t offset); + + virtual ATN deserialize(const std::vector<uint16_t> &input); + virtual void verifyATN(const ATN &atn); + + static void checkCondition(bool condition); + static void checkCondition(bool condition, const std::string &message); + + static Transition *edgeFactory(const ATN &atn, size_t type, size_t src, size_t trg, size_t arg1, size_t arg2, + size_t arg3, const std::vector<misc::IntervalSet> &sets); + + static ATNState *stateFactory(size_t type, size_t ruleIndex); + + protected: + /// Determines if a particular serialized representation of an ATN supports + /// a particular feature, identified by the <seealso cref="UUID"/> used for serializing + /// the ATN at the time the feature was first introduced. + /// + /// <param name="feature"> The <seealso cref="UUID"/> marking the first time the feature was + /// supported in the serialized ATN. </param> + /// <param name="actualUuid"> The <seealso cref="UUID"/> of the actual serialized ATN which is + /// currently being deserialized. </param> + /// <returns> {@code true} if the {@code actualUuid} value represents a + /// serialized ATN at or after the feature identified by {@code feature} was + /// introduced; otherwise, {@code false}. </returns> + virtual bool isFeatureSupported(const Guid &feature, const Guid &actualUuid); + void markPrecedenceDecisions(const ATN &atn); + Ref<LexerAction> lexerActionFactory(LexerActionType type, int data1, int data2); + + private: + /// This is the earliest supported serialized UUID. + static Guid BASE_SERIALIZED_UUID(); + + /// This UUID indicates an extension of <seealso cref="BASE_SERIALIZED_UUID"/> for the + /// addition of precedence predicates. + static Guid ADDED_PRECEDENCE_TRANSITIONS(); + + /** + * This UUID indicates an extension of ADDED_PRECEDENCE_TRANSITIONS + * for the addition of lexer actions encoded as a sequence of + * LexerAction instances. + */ + static Guid ADDED_LEXER_ACTIONS(); + + /** + * This UUID indicates the serialized ATN contains two sets of + * IntervalSets, where the second set's values are encoded as + * 32-bit integers to support the full Unicode SMP range up to U+10FFFF. + */ + static Guid ADDED_UNICODE_SMP(); + + /// This list contains all of the currently supported UUIDs, ordered by when + /// the feature first appeared in this branch. + static std::vector<Guid>& SUPPORTED_UUIDS(); + + ATNDeserializationOptions deserializationOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.cpp new file mode 100644 index 0000000..293bee5 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.cpp @@ -0,0 +1,621 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" +#include "atn/ATNType.h" +#include "atn/ATNState.h" +#include "atn/BlockEndState.h" + +#include "atn/DecisionState.h" +#include "atn/RuleStartState.h" +#include "atn/LoopEndState.h" +#include "atn/BlockStartState.h" +#include "atn/Transition.h" +#include "atn/SetTransition.h" +#include "Token.h" +#include "misc/Interval.h" +#include "atn/ATN.h" + +#include "atn/RuleTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/RangeTransition.h" +#include "atn/AtomTransition.h" +#include "atn/ActionTransition.h" +#include "atn/ATNDeserializer.h" + +#include "atn/TokensStartState.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "atn/LexerChannelAction.h" +#include "atn/LexerCustomAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerTypeAction.h" + +#include "Exceptions.h" + +#include "atn/ATNSerializer.h" + +using namespace antlrcpp; +using namespace antlr4::atn; + +ATNSerializer::ATNSerializer(ATN *atn) { this->atn = atn; } + +ATNSerializer::ATNSerializer(ATN *atn, const std::vector<std::string> &tokenNames) { + this->atn = atn; + _tokenNames = tokenNames; +} + +ATNSerializer::~ATNSerializer() { } + +std::vector<size_t> ATNSerializer::serialize() { + std::vector<size_t> data; + data.push_back(ATNDeserializer::SERIALIZED_VERSION); + serializeUUID(data, ATNDeserializer::SERIALIZED_UUID()); + + // convert grammar type to ATN const to avoid dependence on ANTLRParser + data.push_back(static_cast<size_t>(atn->grammarType)); + data.push_back(atn->maxTokenType); + size_t nedges = 0; + + std::unordered_map<misc::IntervalSet, int> setIndices; + std::vector<misc::IntervalSet> sets; + + // dump states, count edges and collect sets while doing so + std::vector<size_t> nonGreedyStates; + std::vector<size_t> precedenceStates; + data.push_back(atn->states.size()); + for (ATNState *s : atn->states) { + if (s == nullptr) { // might be optimized away + data.push_back(ATNState::ATN_INVALID_TYPE); + continue; + } + + size_t stateType = s->getStateType(); + if (is<DecisionState *>(s) && (static_cast<DecisionState *>(s))->nonGreedy) { + nonGreedyStates.push_back(s->stateNumber); + } + + if (is<RuleStartState *>(s) && (static_cast<RuleStartState *>(s))->isLeftRecursiveRule) { + precedenceStates.push_back(s->stateNumber); + } + + data.push_back(stateType); + + if (s->ruleIndex == INVALID_INDEX) { + data.push_back(0xFFFF); + } + else { + data.push_back(s->ruleIndex); + } + + if (s->getStateType() == ATNState::LOOP_END) { + data.push_back((static_cast<LoopEndState *>(s))->loopBackState->stateNumber); + } + else if (is<BlockStartState *>(s)) { + data.push_back((static_cast<BlockStartState *>(s))->endState->stateNumber); + } + + if (s->getStateType() != ATNState::RULE_STOP) { + // the deserializer can trivially derive these edges, so there's no need + // to serialize them + nedges += s->transitions.size(); + } + + for (size_t i = 0; i < s->transitions.size(); i++) { + Transition *t = s->transitions[i]; + Transition::SerializationType edgeType = t->getSerializationType(); + if (edgeType == Transition::SET || edgeType == Transition::NOT_SET) { + SetTransition *st = static_cast<SetTransition *>(t); + if (setIndices.find(st->set) == setIndices.end()) { + sets.push_back(st->set); + setIndices.insert({ st->set, (int)sets.size() - 1 }); + } + } + } + } + + // non-greedy states + data.push_back(nonGreedyStates.size()); + for (size_t i = 0; i < nonGreedyStates.size(); i++) { + data.push_back(nonGreedyStates.at(i)); + } + + // precedence states + data.push_back(precedenceStates.size()); + for (size_t i = 0; i < precedenceStates.size(); i++) { + data.push_back(precedenceStates.at(i)); + } + + size_t nrules = atn->ruleToStartState.size(); + data.push_back(nrules); + for (size_t r = 0; r < nrules; r++) { + ATNState *ruleStartState = atn->ruleToStartState[r]; + data.push_back(ruleStartState->stateNumber); + if (atn->grammarType == ATNType::LEXER) { + if (atn->ruleToTokenType[r] == Token::EOF) { + data.push_back(0xFFFF); + } + else { + data.push_back(atn->ruleToTokenType[r]); + } + } + } + + size_t nmodes = atn->modeToStartState.size(); + data.push_back(nmodes); + if (nmodes > 0) { + for (const auto &modeStartState : atn->modeToStartState) { + data.push_back(modeStartState->stateNumber); + } + } + + size_t nsets = sets.size(); + data.push_back(nsets); + for (auto set : sets) { + bool containsEof = set.contains(Token::EOF); + if (containsEof && set.getIntervals().at(0).b == -1) { + data.push_back(set.getIntervals().size() - 1); + } + else { + data.push_back(set.getIntervals().size()); + } + + data.push_back(containsEof ? 1 : 0); + for (const auto &interval : set.getIntervals()) { + if (interval.a == -1) { + if (interval.b == -1) { + continue; + } else { + data.push_back(0); + } + } + else { + data.push_back(interval.a); + } + + data.push_back(interval.b); + } + } + + data.push_back(nedges); + for (ATNState *s : atn->states) { + if (s == nullptr) { + // might be optimized away + continue; + } + + if (s->getStateType() == ATNState::RULE_STOP) { + continue; + } + + for (size_t i = 0; i < s->transitions.size(); i++) { + Transition *t = s->transitions[i]; + + if (atn->states[t->target->stateNumber] == nullptr) { + throw IllegalStateException("Cannot serialize a transition to a removed state."); + } + + size_t src = s->stateNumber; + size_t trg = t->target->stateNumber; + Transition::SerializationType edgeType = t->getSerializationType(); + size_t arg1 = 0; + size_t arg2 = 0; + size_t arg3 = 0; + switch (edgeType) { + case Transition::RULE: + trg = (static_cast<RuleTransition *>(t))->followState->stateNumber; + arg1 = (static_cast<RuleTransition *>(t))->target->stateNumber; + arg2 = (static_cast<RuleTransition *>(t))->ruleIndex; + arg3 = (static_cast<RuleTransition *>(t))->precedence; + break; + case Transition::PRECEDENCE: + { + PrecedencePredicateTransition *ppt = + static_cast<PrecedencePredicateTransition *>(t); + arg1 = ppt->precedence; + } + break; + case Transition::PREDICATE: + { + PredicateTransition *pt = static_cast<PredicateTransition *>(t); + arg1 = pt->ruleIndex; + arg2 = pt->predIndex; + arg3 = pt->isCtxDependent ? 1 : 0; + } + break; + case Transition::RANGE: + arg1 = (static_cast<RangeTransition *>(t))->from; + arg2 = (static_cast<RangeTransition *>(t))->to; + if (arg1 == Token::EOF) { + arg1 = 0; + arg3 = 1; + } + + break; + case Transition::ATOM: + arg1 = (static_cast<AtomTransition *>(t))->_label; + if (arg1 == Token::EOF) { + arg1 = 0; + arg3 = 1; + } + + break; + case Transition::ACTION: + { + ActionTransition *at = static_cast<ActionTransition *>(t); + arg1 = at->ruleIndex; + arg2 = at->actionIndex; + if (arg2 == INVALID_INDEX) { + arg2 = 0xFFFF; + } + + arg3 = at->isCtxDependent ? 1 : 0; + } + break; + case Transition::SET: + arg1 = setIndices[(static_cast<SetTransition *>(t))->set]; + break; + + case Transition::NOT_SET: + arg1 = setIndices[(static_cast<SetTransition *>(t))->set]; + break; + + default: + break; + } + + data.push_back(src); + data.push_back(trg); + data.push_back(edgeType); + data.push_back(arg1); + data.push_back(arg2); + data.push_back(arg3); + } + } + + size_t ndecisions = atn->decisionToState.size(); + data.push_back(ndecisions); + for (DecisionState *decStartState : atn->decisionToState) { + data.push_back(decStartState->stateNumber); + } + + // LEXER ACTIONS + if (atn->grammarType == ATNType::LEXER) { + data.push_back(atn->lexerActions.size()); + for (Ref<LexerAction> &action : atn->lexerActions) { + data.push_back(static_cast<size_t>(action->getActionType())); + switch (action->getActionType()) { + case LexerActionType::CHANNEL: + { + int channel = std::dynamic_pointer_cast<LexerChannelAction>(action)->getChannel(); + data.push_back(channel != -1 ? channel : 0xFFFF); + data.push_back(0); + break; + } + + case LexerActionType::CUSTOM: + { + size_t ruleIndex = std::dynamic_pointer_cast<LexerCustomAction>(action)->getRuleIndex(); + size_t actionIndex = std::dynamic_pointer_cast<LexerCustomAction>(action)->getActionIndex(); + data.push_back(ruleIndex != INVALID_INDEX ? ruleIndex : 0xFFFF); + data.push_back(actionIndex != INVALID_INDEX ? actionIndex : 0xFFFF); + break; + } + + case LexerActionType::MODE: + { + int mode = std::dynamic_pointer_cast<LexerModeAction>(action)->getMode(); + data.push_back(mode != -1 ? mode : 0xFFFF); + data.push_back(0); + break; + } + + case LexerActionType::MORE: + data.push_back(0); + data.push_back(0); + break; + + case LexerActionType::POP_MODE: + data.push_back(0); + data.push_back(0); + break; + + case LexerActionType::PUSH_MODE: + { + int mode = std::dynamic_pointer_cast<LexerPushModeAction>(action)->getMode(); + data.push_back(mode != -1 ? mode : 0xFFFF); + data.push_back(0); + break; + } + + case LexerActionType::SKIP: + data.push_back(0); + data.push_back(0); + break; + + case LexerActionType::TYPE: + { + int type = std::dynamic_pointer_cast<LexerTypeAction>(action)->getType(); + data.push_back(type != -1 ? type : 0xFFFF); + data.push_back(0); + break; + } + + default: + throw IllegalArgumentException("The specified lexer action type " + + std::to_string(static_cast<size_t>(action->getActionType())) + + " is not valid."); + } + } + } + + // don't adjust the first value since that's the version number + for (size_t i = 1; i < data.size(); i++) { + if (data.at(i) > 0xFFFF) { + throw UnsupportedOperationException("Serialized ATN data element out of range."); + } + + size_t value = (data.at(i) + 2) & 0xFFFF; + data.at(i) = value; + } + + return data; +} + +//------------------------------------------------------------------------------------------------------------ + +std::string ATNSerializer::decode(const std::wstring &inpdata) { + if (inpdata.size() < 10) + throw IllegalArgumentException("Not enough data to decode"); + + std::vector<uint16_t> data(inpdata.size()); + data[0] = (uint16_t)inpdata[0]; + + // Don't adjust the first value since that's the version number. + for (size_t i = 1; i < inpdata.size(); ++i) { + data[i] = (uint16_t)inpdata[i] - 2; + } + + std::string buf; + size_t p = 0; + size_t version = data[p++]; + if (version != ATNDeserializer::SERIALIZED_VERSION) { + std::string reason = "Could not deserialize ATN with version " + std::to_string(version) + "(expected " + + std::to_string(ATNDeserializer::SERIALIZED_VERSION) + ")."; + throw UnsupportedOperationException("ATN Serializer" + reason); + } + + Guid uuid = ATNDeserializer::toUUID(data.data(), p); + p += 8; + if (uuid != ATNDeserializer::SERIALIZED_UUID()) { + std::string reason = "Could not deserialize ATN with UUID " + uuid.toString() + " (expected " + + ATNDeserializer::SERIALIZED_UUID().toString() + ")."; + throw UnsupportedOperationException("ATN Serializer" + reason); + } + + p++; // skip grammarType + size_t maxType = data[p++]; + buf.append("max type ").append(std::to_string(maxType)).append("\n"); + size_t nstates = data[p++]; + for (size_t i = 0; i < nstates; i++) { + size_t stype = data[p++]; + if (stype == ATNState::ATN_INVALID_TYPE) { // ignore bad type of states + continue; + } + size_t ruleIndex = data[p++]; + if (ruleIndex == 0xFFFF) { + ruleIndex = INVALID_INDEX; + } + + std::string arg = ""; + if (stype == ATNState::LOOP_END) { + int loopBackStateNumber = data[p++]; + arg = std::string(" ") + std::to_string(loopBackStateNumber); + } + else if (stype == ATNState::PLUS_BLOCK_START || + stype == ATNState::STAR_BLOCK_START || + stype == ATNState::BLOCK_START) { + int endStateNumber = data[p++]; + arg = std::string(" ") + std::to_string(endStateNumber); + } + buf.append(std::to_string(i)) + .append(":") + .append(ATNState::serializationNames[stype]) + .append(" ") + .append(std::to_string(ruleIndex)) + .append(arg) + .append("\n"); + } + size_t numNonGreedyStates = data[p++]; + p += numNonGreedyStates; // Instead of that useless loop below. + /* + for (int i = 0; i < numNonGreedyStates; i++) { + int stateNumber = data[p++]; + } + */ + + size_t numPrecedenceStates = data[p++]; + p += numPrecedenceStates; + /* + for (int i = 0; i < numPrecedenceStates; i++) { + int stateNumber = data[p++]; + } + */ + + size_t nrules = data[p++]; + for (size_t i = 0; i < nrules; i++) { + size_t s = data[p++]; + if (atn->grammarType == ATNType::LEXER) { + size_t arg1 = data[p++]; + buf.append("rule ") + .append(std::to_string(i)) + .append(":") + .append(std::to_string(s)) + .append(" ") + .append(std::to_string(arg1)) + .append("\n"); + } + else { + buf.append("rule ") + .append(std::to_string(i)) + .append(":") + .append(std::to_string(s)) + .append("\n"); + } + } + size_t nmodes = data[p++]; + for (size_t i = 0; i < nmodes; i++) { + size_t s = data[p++]; + buf.append("mode ") + .append(std::to_string(i)) + .append(":") + .append(std::to_string(s)) + .append("\n"); + } + size_t nsets = data[p++]; + for (size_t i = 0; i < nsets; i++) { + size_t nintervals = data[p++]; + buf.append(std::to_string(i)).append(":"); + bool containsEof = data[p++] != 0; + if (containsEof) { + buf.append(getTokenName(Token::EOF)); + } + + for (size_t j = 0; j < nintervals; j++) { + if (containsEof || j > 0) { + buf.append(", "); + } + + buf.append(getTokenName(data[p])) + .append("..") + .append(getTokenName(data[p + 1])); + p += 2; + } + buf.append("\n"); + } + size_t nedges = data[p++]; + for (size_t i = 0; i < nedges; i++) { + size_t src = data[p]; + size_t trg = data[p + 1]; + size_t ttype = data[p + 2]; + size_t arg1 = data[p + 3]; + size_t arg2 = data[p + 4]; + size_t arg3 = data[p + 5]; + buf.append(std::to_string(src)) + .append("->") + .append(std::to_string(trg)) + .append(" ") + .append(Transition::serializationNames[ttype]) + .append(" ") + .append(std::to_string(arg1)) + .append(",") + .append(std::to_string(arg2)) + .append(",") + .append(std::to_string(arg3)) + .append("\n"); + p += 6; + } + size_t ndecisions = data[p++]; + for (size_t i = 0; i < ndecisions; i++) { + size_t s = data[p++]; + buf += std::to_string(i) + ":" + std::to_string(s) + "\n"; + } + + if (atn->grammarType == ATNType::LEXER) { + //int lexerActionCount = data[p++]; + + //p += lexerActionCount * 3; // Instead of useless loop below. + /* + for (int i = 0; i < lexerActionCount; i++) { + LexerActionType actionType = (LexerActionType)data[p++]; + int data1 = data[p++]; + int data2 = data[p++]; + } + */ + } + + return buf; +} + +std::string ATNSerializer::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + + if (atn->grammarType == ATNType::LEXER && t <= 0x10FFFF) { + switch (t) { + case '\n': + return "'\\n'"; + case '\r': + return "'\\r'"; + case '\t': + return "'\\t'"; + case '\b': + return "'\\b'"; + case '\f': + return "'\\f'"; + case '\\': + return "'\\\\'"; + case '\'': + return "'\\''"; + default: + std::string s_hex = antlrcpp::toHexString((int)t); + if (s_hex >= "0" && s_hex <= "7F" && !iscntrl((int)t)) { + return "'" + std::to_string(t) + "'"; + } + + // turn on the bit above max "\u10FFFF" value so that we pad with zeros + // then only take last 6 digits + std::string hex = antlrcpp::toHexString((int)t | 0x1000000).substr(1, 6); + std::string unicodeStr = std::string("'\\u") + hex + std::string("'"); + return unicodeStr; + } + } + + if (_tokenNames.size() > 0 && t < _tokenNames.size()) { + return _tokenNames[t]; + } + + return std::to_string(t); +} + +std::wstring ATNSerializer::getSerializedAsString(ATN *atn) { + std::vector<size_t> data = getSerialized(atn); + std::wstring result; + for (size_t entry : data) + result.push_back((wchar_t)entry); + + return result; +} + +std::vector<size_t> ATNSerializer::getSerialized(ATN *atn) { + return ATNSerializer(atn).serialize(); +} + +std::string ATNSerializer::getDecoded(ATN *atn, std::vector<std::string> &tokenNames) { + std::wstring serialized = getSerializedAsString(atn); + return ATNSerializer(atn, tokenNames).decode(serialized); +} + +void ATNSerializer::serializeUUID(std::vector<size_t> &data, Guid uuid) { + unsigned int twoBytes = 0; + bool firstByte = true; + for( std::vector<unsigned char>::const_reverse_iterator rit = uuid.rbegin(); rit != uuid.rend(); ++rit ) + { + if (firstByte) { + twoBytes = *rit; + firstByte = false; + } else { + twoBytes |= (*rit << 8); + data.push_back(twoBytes); + firstByte = true; + } + } + if (!firstByte) + throw IllegalArgumentException( "The UUID provided is not valid (odd number of bytes)." ); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.h new file mode 100644 index 0000000..a6d1d69 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSerializer.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSerializer { + public: + ATN *atn; + + ATNSerializer(ATN *atn); + ATNSerializer(ATN *atn, const std::vector<std::string> &tokenNames); + virtual ~ATNSerializer(); + + /// <summary> + /// Serialize state descriptors, edge descriptors, and decision->state map + /// into list of ints: + /// + /// grammar-type, (ANTLRParser.LEXER, ...) + /// max token type, + /// num states, + /// state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type + /// ruleIndex optional-arg ... + /// num rules, + /// rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ... + /// (args are token type,actionIndex in lexer else 0,0) + /// num modes, + /// mode-0-start-state, mode-1-start-state, ... (parser has 0 modes) + /// num sets + /// set-0-interval-count intervals, set-1-interval-count intervals, ... + /// num total edges, + /// src, trg, edge-type, edge arg1, optional edge arg2 (present always), + /// ... + /// num decisions, + /// decision-0-start-state, decision-1-start-state, ... + /// + /// Convenient to pack into unsigned shorts to make as Java string. + /// </summary> + virtual std::vector<size_t> serialize(); + + virtual std::string decode(const std::wstring& data); + virtual std::string getTokenName(size_t t); + + /// Used by Java target to encode short/int array as chars in string. + static std::wstring getSerializedAsString(ATN *atn); + static std::vector<size_t> getSerialized(ATN *atn); + + static std::string getDecoded(ATN *atn, std::vector<std::string> &tokenNames); + + private: + std::vector<std::string> _tokenNames; + + void serializeUUID(std::vector<size_t> &data, Guid uuid); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.cpp new file mode 100644 index 0000000..29570b9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNType.h" +#include "atn/ATNConfigSet.h" +#include "dfa/DFAState.h" +#include "atn/ATNDeserializer.h" +#include "atn/EmptyPredictionContext.h" + +#include "atn/ATNSimulator.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlr4::atn; + +const Ref<DFAState> ATNSimulator::ERROR = std::make_shared<DFAState>(INT32_MAX); +antlrcpp::SingleWriteMultipleReadLock ATNSimulator::_stateLock; +antlrcpp::SingleWriteMultipleReadLock ATNSimulator::_edgeLock; + +ATNSimulator::ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache) +: atn(atn), _sharedContextCache(sharedContextCache) { +} + +ATNSimulator::~ATNSimulator() { +} + +void ATNSimulator::clearDFA() { + throw UnsupportedOperationException("This ATN simulator does not support clearing the DFA."); +} + +PredictionContextCache& ATNSimulator::getSharedContextCache() { + return _sharedContextCache; +} + +Ref<PredictionContext> ATNSimulator::getCachedContext(Ref<PredictionContext> const& context) { + // This function must only be called with an active state lock, as we are going to change a shared structure. + std::map<Ref<PredictionContext>, Ref<PredictionContext>> visited; + return PredictionContext::getCachedContext(context, _sharedContextCache, visited); +} + +ATN ATNSimulator::deserialize(const std::vector<uint16_t> &data) { + ATNDeserializer deserializer; + return deserializer.deserialize(data); +} + +void ATNSimulator::checkCondition(bool condition) { + ATNDeserializer::checkCondition(condition); +} + +void ATNSimulator::checkCondition(bool condition, const std::string &message) { + ATNDeserializer::checkCondition(condition, message); +} + +Transition *ATNSimulator::edgeFactory(const ATN &atn, int type, int src, int trg, int arg1, int arg2, int arg3, + const std::vector<misc::IntervalSet> &sets) { + return ATNDeserializer::edgeFactory(atn, type, src, trg, arg1, arg2, arg3, sets); +} + +ATNState *ATNSimulator::stateFactory(int type, int ruleIndex) { + return ATNDeserializer::stateFactory(type, ruleIndex); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.h new file mode 100644 index 0000000..f702c97 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNSimulator.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATN.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSimulator { + public: + /// Must distinguish between missing edge and edge we know leads nowhere. + static const Ref<dfa::DFAState> ERROR; + const ATN &atn; + + ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache); + virtual ~ATNSimulator(); + + virtual void reset() = 0; + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + virtual void clearDFA(); + virtual PredictionContextCache& getSharedContextCache(); + virtual Ref<PredictionContext> getCachedContext(Ref<PredictionContext> const& context); + + /// @deprecated Use <seealso cref="ATNDeserializer#deserialize"/> instead. + static ATN deserialize(const std::vector<uint16_t> &data); + + /// @deprecated Use <seealso cref="ATNDeserializer#checkCondition(boolean)"/> instead. + static void checkCondition(bool condition); + + /// @deprecated Use <seealso cref="ATNDeserializer#checkCondition(boolean, String)"/> instead. + static void checkCondition(bool condition, const std::string &message); + + /// @deprecated Use <seealso cref="ATNDeserializer#edgeFactory"/> instead. + static Transition *edgeFactory(const ATN &atn, int type, int src, int trg, int arg1, int arg2, int arg3, + const std::vector<misc::IntervalSet> &sets); + + /// @deprecated Use <seealso cref="ATNDeserializer#stateFactory"/> instead. + static ATNState *stateFactory(int type, int ruleIndex); + + protected: + static antlrcpp::SingleWriteMultipleReadLock _stateLock; // Lock for DFA states. + static antlrcpp::SingleWriteMultipleReadLock _edgeLock; // Lock for the sparse edge map in DFA states. + + /// <summary> + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + /// <p/> + /// This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only. + /// <p/> + /// I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity. + /// </summary> + PredictionContextCache &_sharedContextCache; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.cpp new file mode 100644 index 0000000..5dcb85d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.cpp @@ -0,0 +1,72 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/Transition.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +#include "atn/ATNState.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +ATNState::ATNState() { +} + +ATNState::~ATNState() { + for (auto *transition : transitions) { + delete transition; + } +} + +const std::vector<std::string> ATNState::serializationNames = { + "INVALID", "BASIC", "RULE_START", "BLOCK_START", + "PLUS_BLOCK_START", "STAR_BLOCK_START", "TOKEN_START", "RULE_STOP", + "BLOCK_END", "STAR_LOOP_BACK", "STAR_LOOP_ENTRY", "PLUS_LOOP_BACK", "LOOP_END" +}; + +size_t ATNState::hashCode() { + return stateNumber; +} + +bool ATNState::operator == (const ATNState &other) { + return stateNumber == other.stateNumber; +} + +bool ATNState::isNonGreedyExitState() { + return false; +} + +std::string ATNState::toString() const { + return std::to_string(stateNumber); +} + +void ATNState::addTransition(Transition *e) { + addTransition(transitions.size(), e); +} + +void ATNState::addTransition(size_t index, Transition *e) { + for (Transition *transition : transitions) + if (transition->target->stateNumber == e->target->stateNumber) { + delete e; + return; + } + + if (transitions.empty()) { + epsilonOnlyTransitions = e->isEpsilon(); + } else if (epsilonOnlyTransitions != e->isEpsilon()) { + std::cerr << "ATN state %d has both epsilon and non-epsilon transitions.\n" << stateNumber; + epsilonOnlyTransitions = false; + } + + transitions.insert(transitions.begin() + index, e); +} + +Transition *ATNState::removeTransition(size_t index) { + Transition *result = transitions[index]; + transitions.erase(transitions.begin() + index); + return result; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.h new file mode 100644 index 0000000..8ed6137 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNState.h @@ -0,0 +1,140 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// The following images show the relation of states and + /// <seealso cref="ATNState#transitions"/> for various grammar constructs. + /// + /// <ul> + /// + /// <li>Solid edges marked with an ε indicate a required + /// <seealso cref="EpsilonTransition"/>.</li> + /// + /// <li>Dashed edges indicate locations where any transition derived from + /// <seealso cref="Transition"/> might appear.</li> + /// + /// <li>Dashed nodes are place holders for either a sequence of linked + /// <seealso cref="BasicState"/> states or the inclusion of a block representing a nested + /// construct in one of the forms below.</li> + /// + /// <li>Nodes showing multiple outgoing alternatives with a {@code ...} support + /// any number of alternatives (one or more). Nodes without the {@code ...} only + /// support the exact number of alternatives shown in the diagram.</li> + /// + /// </ul> + /// + /// <h2>Basic Blocks</h2> + /// + /// <h3>Rule</h3> + /// + /// <embed src="images/Rule.svg" type="image/svg+xml"/> + /// + /// <h3>Block of 1 or more alternatives</h3> + /// + /// <embed src="images/Block.svg" type="image/svg+xml"/> + /// + /// <h2>Greedy Loops</h2> + /// + /// <h3>Greedy Closure: {@code (...)*}</h3> + /// + /// <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Positive Closure: {@code (...)+}</h3> + /// + /// <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Optional: {@code (...)?}</h3> + /// + /// <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/> + /// + /// <h2>Non-Greedy Loops</h2> + /// + /// <h3>Non-Greedy Closure: {@code (...)*?}</h3> + /// + /// <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3> + /// + /// <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Optional: {@code (...)??}</h3> + /// + /// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/> + /// </summary> + class ANTLR4CPP_PUBLIC ATN; + + class ANTLR4CPP_PUBLIC ATNState { + public: + ATNState(); + ATNState(ATNState const&) = delete; + + virtual ~ATNState(); + + ATNState& operator=(ATNState const&) = delete; + +#if __cplusplus >= 201703L + static constexpr size_t INITIAL_NUM_TRANSITIONS = 4; + static constexpr size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max(); +#else + enum : size_t { + INITIAL_NUM_TRANSITIONS = 4, + INVALID_STATE_NUMBER = static_cast<size_t>(-1), // std::numeric_limits<size_t>::max(); doesn't work in VS 2013 + }; +#endif + + enum { + ATN_INVALID_TYPE = 0, + BASIC = 1, + RULE_START = 2, + BLOCK_START = 3, + PLUS_BLOCK_START = 4, + STAR_BLOCK_START = 5, + TOKEN_START = 6, + RULE_STOP = 7, + BLOCK_END = 8, + STAR_LOOP_BACK = 9, + STAR_LOOP_ENTRY = 10, + PLUS_LOOP_BACK = 11, + LOOP_END = 12 + }; + + static const std::vector<std::string> serializationNames; + + size_t stateNumber = INVALID_STATE_NUMBER; + size_t ruleIndex = 0; // at runtime, we don't have Rule objects + bool epsilonOnlyTransitions = false; + + public: + virtual size_t hashCode(); + bool operator == (const ATNState &other); + + /// Track the transitions emanating from this ATN state. + std::vector<Transition*> transitions; + + virtual bool isNonGreedyExitState(); + virtual std::string toString() const; + virtual void addTransition(Transition *e); + virtual void addTransition(size_t index, Transition *e); + virtual Transition* removeTransition(size_t index); + virtual size_t getStateType() = 0; + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet _nextTokenWithinRule; + std::atomic<bool> _nextTokenUpdated { false }; + + friend class ATN; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNType.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNType.h new file mode 100644 index 0000000..19ed7a6 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ATNType.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// Represents the type of recognizer an ATN applies to. + enum class ATNType { + LEXER = 0, + PARSER = 1, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.cpp new file mode 100644 index 0000000..ef8afc2 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/AbstractPredicateTransition.h" + +using namespace antlr4::atn; + +AbstractPredicateTransition::AbstractPredicateTransition(ATNState *target) : Transition(target) { +} + +AbstractPredicateTransition::~AbstractPredicateTransition() { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.h new file mode 100644 index 0000000..4865cb1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AbstractPredicateTransition.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTState; + + class ANTLR4CPP_PUBLIC AbstractPredicateTransition : public Transition { + + public: + AbstractPredicateTransition(ATNState *target); + ~AbstractPredicateTransition(); + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.cpp new file mode 100644 index 0000000..fa11c44 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ActionTransition.h" + +using namespace antlr4::atn; + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex) + : Transition(target), ruleIndex(ruleIndex), actionIndex(INVALID_INDEX), isCtxDependent(false) { +} + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent) + : Transition(target), ruleIndex(ruleIndex), actionIndex(actionIndex), isCtxDependent(isCtxDependent) { +} + +Transition::SerializationType ActionTransition::getSerializationType() const { + return ACTION; +} + +bool ActionTransition::isEpsilon() const { + return true; // we are to be ignored by analysis 'cept for predicates +} + +bool ActionTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string ActionTransition::toString() const { + return " ACTION " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + ", actionIndex: " + + std::to_string(actionIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.h new file mode 100644 index 0000000..652e75f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ActionTransition.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ActionTransition final : public Transition { + public: + const size_t ruleIndex; + const size_t actionIndex; + const bool isCtxDependent; // e.g., $i ref in action + + ActionTransition(ATNState *target, size_t ruleIndex); + + ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent); + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.cpp new file mode 100644 index 0000000..72ce922 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/AmbiguityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +AmbiguityInfo::AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->ambigAlts = ambigAlts; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.h new file mode 100644 index 0000000..db594a1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AmbiguityInfo.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for an ambiguity. + /// Ambiguities are decisions where a particular input resulted in an SLL + /// conflict, followed by LL prediction also reaching a conflict state + /// (indicating a true ambiguity in the grammar). + /// + /// <para> + /// This event may be reported during SLL prediction in cases where the + /// conflicting SLL configuration set provides sufficient information to + /// determine that the SLL conflict is truly an ambiguity. For example, if none + /// of the ATN configurations in the conflicting SLL configuration set have + /// traversed a global follow transition (i.e. + /// <seealso cref="ATNConfig#reachesIntoOuterContext"/> is 0 for all configurations), then + /// the result of SLL prediction for that input is known to be equivalent to the + /// result of LL prediction for that input.</para> + /// + /// <para> + /// In some cases, the minimum represented alternative in the conflicting LL + /// configuration set is not equal to the minimum represented alternative in the + /// conflicting SLL configuration set. Grammars and inputs which result in this + /// scenario are unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means + /// they cannot use the two-stage parsing strategy to improve parsing performance + /// for that input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportAmbiguity </seealso> + /// <seealso cref= ANTLRErrorListener#reportAmbiguity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo { + public: + /// The set of alternative numbers for this decision event that lead to a valid parse. + antlrcpp::BitSet ambigAlts; + + /// <summary> + /// Constructs a new instance of the <seealso cref="AmbiguityInfo"/> class with the + /// specified detailed ambiguity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set identifying the ambiguous + /// alternatives for the current input </param> + /// <param name="ambigAlts"> The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the ambiguity was identified during + /// prediction </param> + /// <param name="fullCtx"> {@code true} if the ambiguity was identified during LL + /// prediction; otherwise, {@code false} if the ambiguity was identified + /// during SLL prediction </param> + AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input, + size_t startIndex, size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.cpp new file mode 100644 index 0000000..b69d30d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.cpp @@ -0,0 +1,82 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/Arrays.h" +#include "atn/SingletonPredictionContext.h" + +#include "atn/ArrayPredictionContext.h" + +using namespace antlr4::atn; + +ArrayPredictionContext::ArrayPredictionContext(Ref<SingletonPredictionContext> const& a) + : ArrayPredictionContext({ a->parent }, { a->returnState }) { +} + +ArrayPredictionContext::ArrayPredictionContext(std::vector<Ref<PredictionContext>> const& parents_, + std::vector<size_t> const& returnStates) + : PredictionContext(calculateHashCode(parents_, returnStates)), parents(parents_), returnStates(returnStates) { + assert(parents.size() > 0); + assert(returnStates.size() > 0); +} + +ArrayPredictionContext::~ArrayPredictionContext() { +} + +bool ArrayPredictionContext::isEmpty() const { + // Since EMPTY_RETURN_STATE can only appear in the last position, we don't need to verify that size == 1. + return returnStates[0] == EMPTY_RETURN_STATE; +} + +size_t ArrayPredictionContext::size() const { + return returnStates.size(); +} + +Ref<PredictionContext> ArrayPredictionContext::getParent(size_t index) const { + return parents[index]; +} + +size_t ArrayPredictionContext::getReturnState(size_t index) const { + return returnStates[index]; +} + +bool ArrayPredictionContext::operator == (PredictionContext const& o) const { + if (this == &o) { + return true; + } + + const ArrayPredictionContext *other = dynamic_cast<const ArrayPredictionContext*>(&o); + if (other == nullptr || hashCode() != other->hashCode()) { + return false; // can't be same if hash is different + } + + return antlrcpp::Arrays::equals(returnStates, other->returnStates) && + antlrcpp::Arrays::equals(parents, other->parents); +} + +std::string ArrayPredictionContext::toString() const { + if (isEmpty()) { + return "[]"; + } + + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < returnStates.size(); i++) { + if (i > 0) { + ss << ", "; + } + if (returnStates[i] == EMPTY_RETURN_STATE) { + ss << "$"; + continue; + } + ss << returnStates[i]; + if (parents[i] != nullptr) { + ss << " " << parents[i]->toString(); + } else { + ss << "nul"; + } + } + ss << "]"; + return ss.str(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.h new file mode 100644 index 0000000..53a5b17 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ArrayPredictionContext.h @@ -0,0 +1,43 @@ + +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class SingletonPredictionContext; + + class ANTLR4CPP_PUBLIC ArrayPredictionContext : public PredictionContext { + public: + /// Parent can be empty only if full ctx mode and we make an array + /// from EMPTY and non-empty. We merge EMPTY by using null parent and + /// returnState == EMPTY_RETURN_STATE. + // Also here: we use a strong reference to our parents to avoid having them freed prematurely. + // See also SinglePredictionContext. + const std::vector<Ref<PredictionContext>> parents; + + /// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last. + const std::vector<size_t> returnStates; + + ArrayPredictionContext(Ref<SingletonPredictionContext> const& a); + ArrayPredictionContext(std::vector<Ref<PredictionContext>> const& parents_, std::vector<size_t> const& returnStates); + virtual ~ArrayPredictionContext(); + + virtual bool isEmpty() const override; + virtual size_t size() const override; + virtual Ref<PredictionContext> getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + bool operator == (const PredictionContext &o) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.cpp new file mode 100644 index 0000000..af956c2 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" + +#include "atn/AtomTransition.h" + +using namespace antlr4::misc; +using namespace antlr4::atn; + +AtomTransition::AtomTransition(ATNState *target, size_t label) : Transition(target), _label(label) { +} + +Transition::SerializationType AtomTransition::getSerializationType() const { + return ATOM; +} + +IntervalSet AtomTransition::label() const { + return IntervalSet::of((int)_label); +} + +bool AtomTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return _label == symbol; +} + +std::string AtomTransition::toString() const { + return "ATOM " + Transition::toString() + " { label: " + std::to_string(_label) + " }"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.h new file mode 100644 index 0000000..cc22e5a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/AtomTransition.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// TODO: make all transitions sets? no, should remove set edges. + class ANTLR4CPP_PUBLIC AtomTransition final : public Transition { + public: + /// The token type or character value; or, signifies special label. + const size_t _label; + + AtomTransition(ATNState *target, size_t label); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.cpp new file mode 100644 index 0000000..dc36737 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/BasicBlockStartState.h" + +using namespace antlr4::atn; + +size_t BasicBlockStartState::getStateType() { + return BLOCK_START; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.h new file mode 100644 index 0000000..471fbc7 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicBlockStartState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.cpp new file mode 100644 index 0000000..c8cda80 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/BasicState.h" + +using namespace antlr4::atn; + +size_t BasicState::getStateType() { + return BASIC; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.h new file mode 100644 index 0000000..b650dc2 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BasicState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicState final : public ATNState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.cpp new file mode 100644 index 0000000..098d52a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/BlockEndState.h" + +using namespace antlr4::atn; + +BlockEndState::BlockEndState() : startState(nullptr) { +} + +size_t BlockEndState::getStateType() { + return BLOCK_END; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.h new file mode 100644 index 0000000..b24bee1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockEndState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Terminal node of a simple {@code (a|b|c)} block. + class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState { + public: + BlockStartState *startState = nullptr; + + BlockEndState(); + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.cpp new file mode 100644 index 0000000..44cca8f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "BlockStartState.h" + +antlr4::atn::BlockStartState::~BlockStartState() { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.h new file mode 100644 index 0000000..725c700 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/BlockStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The start of a regular {@code (...)} block. + class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState { + public: + ~BlockStartState(); + BlockEndState *endState = nullptr; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.cpp new file mode 100644 index 0000000..12442a9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ContextSensitivityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ContextSensitivityInfo::ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, + size_t startIndex, size_t stopIndex) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, true) { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.h new file mode 100644 index 0000000..430ce3b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ContextSensitivityInfo.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a context sensitivity. + /// Context sensitivities are decisions where a particular input resulted in an + /// SLL conflict, but LL prediction produced a single unique alternative. + /// + /// <para> + /// In some cases, the unique alternative identified by LL prediction is not + /// equal to the minimum represented alternative in the conflicting SLL + /// configuration set. Grammars and inputs which result in this scenario are + /// unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means they cannot use + /// the two-stage parsing strategy to improve parsing performance for that + /// input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportContextSensitivity </seealso> + /// <seealso cref= ANTLRErrorListener#reportContextSensitivity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ContextSensitivityInfo"/> class + /// with the specified detailed context sensitivity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the unique + /// alternative identified by full-context prediction </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the context sensitivity was + /// identified during full-context prediction </param> + ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.cpp new file mode 100644 index 0000000..bca6c77 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +DecisionEventInfo::DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx) + : decision(decision), configs(configs), input(input), startIndex(startIndex), stopIndex(stopIndex), fullCtx(fullCtx) { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.h new file mode 100644 index 0000000..af7f5f4 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionEventInfo.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This is the base class for gathering detailed information about prediction + /// events which occur during parsing. + /// + /// Note that we could record the parser call stack at the time this event + /// occurred but in the presence of left recursive rules, the stack is kind of + /// meaningless. It's better to look at the individual configurations for their + /// individual stacks. Of course that is a <seealso cref="PredictionContext"/> object + /// not a parse tree node and so it does not have information about the extent + /// (start...stop) of the various subtrees. Examining the stack tops of all + /// configurations provide the return states for the rule invocations. + /// From there you can get the enclosing rule. + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionEventInfo { + public: + /// <summary> + /// The invoked decision number which this event is related to. + /// </summary> + /// <seealso cref= ATN#decisionToState </seealso> + const size_t decision; + + /// <summary> + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or {@code null} if no + /// additional information is relevant or available. + /// </summary> + const ATNConfigSet *configs; + + /// <summary> + /// The input token stream which is being parsed. + /// </summary> + const TokenStream *input; + + /// <summary> + /// The token index in the input stream at which the current prediction was + /// originally invoked. + /// </summary> + const size_t startIndex; + + /// <summary> + /// The token index in the input stream at which the current event occurred. + /// </summary> + const size_t stopIndex; + + /// <summary> + /// {@code true} if the current event occurred during LL prediction; + /// otherwise, {@code false} if the input occurred during SLL prediction. + /// </summary> + const bool fullCtx; + + DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.cpp new file mode 100644 index 0000000..ee9b1aa --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ErrorInfo.h" +#include "atn/LookaheadEventInfo.h" + +#include "atn/DecisionInfo.h" + +using namespace antlr4::atn; + +DecisionInfo::DecisionInfo(size_t decision) : decision(decision) { +} + +std::string DecisionInfo::toString() const { + std::stringstream ss; + + ss << "{decision=" << decision << ", contextSensitivities=" << contextSensitivities.size() << ", errors="; + ss << errors.size() << ", ambiguities=" << ambiguities.size() << ", SLL_lookahead=" << SLL_TotalLook; + ss << ", SLL_ATNTransitions=" << SLL_ATNTransitions << ", SLL_DFATransitions=" << SLL_DFATransitions; + ss << ", LL_Fallback=" << LL_Fallback << ", LL_lookahead=" << LL_TotalLook << ", LL_ATNTransitions=" << LL_ATNTransitions << '}'; + + return ss.str(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.h new file mode 100644 index 0000000..cfbb2e9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionInfo.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ContextSensitivityInfo.h" +#include "atn/AmbiguityInfo.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/ErrorInfo.h" + +namespace antlr4 { +namespace atn { + + class LookaheadEventInfo; + + /// <summary> + /// This class contains profiling gathered for a particular decision. + /// + /// <para> + /// Parsing performance in ANTLR 4 is heavily influenced by both static factors + /// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + /// choice of input and the state of the DFA cache at the time profiling + /// operations are started). For best results, gather and use aggregate + /// statistics from a large sample of inputs representing the inputs expected in + /// production before using the results to make changes in the grammar.</para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionInfo { + public: + /// <summary> + /// The decision number, which is an index into <seealso cref="ATN#decisionToState"/>. + /// </summary> + const size_t decision; + + /// <summary> + /// The total number of times <seealso cref="ParserATNSimulator#adaptivePredict"/> was + /// invoked for this decision. + /// </summary> + long long invocations = 0; + + /// <summary> + /// The total time spent in <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision, in nanoseconds. + /// + /// <para> + /// The value of this field contains the sum of differential results obtained + /// by <seealso cref="System#nanoTime()"/>, and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// <seealso cref="System#nanoTime()"/>, and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call <seealso cref="ATNSimulator#clearDFA"/> to reset the DFA cache to its initial + /// state before starting the profiling measurement pass.</para> + /// </summary> + long long timeInPrediction = 0; + + /// <summary> + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when <seealso cref="PredictionMode#LL"/> or + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. + /// </summary> + long long SLL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MaxLook = 0; + + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#SLL_MaxLook"/> value was set. + Ref<LookaheadEventInfo> SLL_MaxLookEvent; + + /// <summary> + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + /// </summary> + long long LL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MaxLook = 0; + + /// <summary> + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#LL_MaxLook"/> value was set. + /// </summary> + Ref<LookaheadEventInfo> LL_MaxLookEvent; + + /// <summary> + /// A collection of <seealso cref="ContextSensitivityInfo"/> instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= ContextSensitivityInfo </seealso> + std::vector<ContextSensitivityInfo> contextSensitivities; + + /// <summary> + /// A collection of <seealso cref="ErrorInfo"/> instances describing the parse errors + /// identified during calls to <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision. + /// </summary> + /// <seealso cref= ErrorInfo </seealso> + std::vector<ErrorInfo> errors; + + /// <summary> + /// A collection of <seealso cref="AmbiguityInfo"/> instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= AmbiguityInfo </seealso> + std::vector<AmbiguityInfo> ambiguities; + + /// <summary> + /// A collection of <seealso cref="PredicateEvalInfo"/> instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// </summary> + /// <seealso cref= PredicateEvalInfo </seealso> + std::vector<PredicateEvalInfo> predicateEvals; + + /// <summary> + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #SLL_ATNTransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long SLL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long SLL_DFATransitions = 0; + + /// <summary> + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + /// <para>Note that this value is not related to whether or not + /// <seealso cref="PredictionMode#SLL"/> may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, <seealso cref="PredictionMode#SLL"/> would produce the same overall + /// parsing result as <seealso cref="PredictionMode#LL"/>.</para> + /// </summary> + long long LL_Fallback = 0; + + /// <summary> + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #LL_DFATransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long LL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long LL_DFATransitions = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="DecisionInfo"/> class to contain + /// statistics for a particular decision. + /// </summary> + /// <param name="decision"> The decision number </param> + DecisionInfo(size_t decision); + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.cpp new file mode 100644 index 0000000..924f814 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionState.h" + +using namespace antlr4::atn; + +void DecisionState::InitializeInstanceFields() { + decision = -1; + nonGreedy = false; +} + +std::string DecisionState::toString() const { + return "DECISION " + ATNState::toString(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.h new file mode 100644 index 0000000..005de25 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/DecisionState.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC DecisionState : public ATNState { + public: + int decision; + bool nonGreedy; + + private: + void InitializeInstanceFields(); + + public: + DecisionState() { + InitializeInstanceFields(); + } + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.cpp new file mode 100644 index 0000000..17223bf --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EmptyPredictionContext.h" + +using namespace antlr4::atn; + +EmptyPredictionContext::EmptyPredictionContext() : SingletonPredictionContext(nullptr, EMPTY_RETURN_STATE) { +} + +bool EmptyPredictionContext::isEmpty() const { + return true; +} + +size_t EmptyPredictionContext::size() const { + return 1; +} + +Ref<PredictionContext> EmptyPredictionContext::getParent(size_t /*index*/) const { + return nullptr; +} + +size_t EmptyPredictionContext::getReturnState(size_t /*index*/) const { + return returnState; +} + +bool EmptyPredictionContext::operator == (const PredictionContext &o) const { + return this == &o; +} + +std::string EmptyPredictionContext::toString() const { + return "$"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.h new file mode 100644 index 0000000..93c036c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EmptyPredictionContext.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SingletonPredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EmptyPredictionContext : public SingletonPredictionContext { + public: + EmptyPredictionContext(); + + virtual bool isEmpty() const override; + virtual size_t size() const override; + virtual Ref<PredictionContext> getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + virtual std::string toString() const override; + + virtual bool operator == (const PredictionContext &o) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.cpp new file mode 100644 index 0000000..550605d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EpsilonTransition.h" + +using namespace antlr4::atn; + +EpsilonTransition::EpsilonTransition(ATNState *target) : EpsilonTransition(target, INVALID_INDEX) { +} + +EpsilonTransition::EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn) + : Transition(target), _outermostPrecedenceReturn(outermostPrecedenceReturn) { +} + +size_t EpsilonTransition::outermostPrecedenceReturn() { + return _outermostPrecedenceReturn; +} + +Transition::SerializationType EpsilonTransition::getSerializationType() const { + return EPSILON; +} + +bool EpsilonTransition::isEpsilon() const { + return true; +} + +bool EpsilonTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string EpsilonTransition::toString() const { + return "EPSILON " + Transition::toString() + " {}"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.h new file mode 100644 index 0000000..41fb0fb --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/EpsilonTransition.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition { + public: + EpsilonTransition(ATNState *target); + EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn); + + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, INVALID_INDEX. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + size_t outermostPrecedenceReturn(); + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + private: + const size_t _outermostPrecedenceReturn; // A rule index. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.cpp new file mode 100644 index 0000000..efe8507 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" + +#include "atn/ErrorInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ErrorInfo::ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.h new file mode 100644 index 0000000..d34642a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ErrorInfo.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a syntax error + /// identified during prediction. Syntax errors occur when the prediction + /// algorithm is unable to identify an alternative which would lead to a + /// successful parse. + /// </summary> + /// <seealso cref= Parser#notifyErrorListeners(Token, String, RecognitionException) </seealso> + /// <seealso cref= ANTLRErrorListener#syntaxError + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ErrorInfo"/> class with the + /// specified detailed syntax error information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set reached during prediction + /// prior to reaching the <seealso cref="ATNSimulator#ERROR"/> state </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the syntax error was identified </param> + /// <param name="fullCtx"> {@code true} if the syntax error was identified during LL + /// prediction; otherwise, {@code false} if the syntax error was identified + /// during SLL prediction </param> + ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, + bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.cpp new file mode 100644 index 0000000..ddca800 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.cpp @@ -0,0 +1,156 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/Transition.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/AbstractPredicateTransition.h" +#include "atn/WildcardTransition.h" +#include "atn/NotSetTransition.h" +#include "misc/IntervalSet.h" +#include "atn/ATNConfig.h" +#include "atn/EmptyPredictionContext.h" + +#include "support/CPPUtils.h" + +#include "atn/LL1Analyzer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +LL1Analyzer::LL1Analyzer(const ATN &atn) : _atn(atn) { +} + +LL1Analyzer::~LL1Analyzer() { +} + +std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const { + std::vector<misc::IntervalSet> look; + + if (s == nullptr) { + return look; + } + + look.resize(s->transitions.size()); // Fills all interval sets with defaults. + for (size_t alt = 0; alt < s->transitions.size(); alt++) { + bool seeThruPreds = false; // fail to get lookahead upon pred + + ATNConfig::Set lookBusy; + antlrcpp::BitSet callRuleStack; + _LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY, + look[alt], lookBusy, callRuleStack, seeThruPreds, false); + + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].size() == 0 || look[alt].contains(HIT_PRED)) { + look[alt].clear(); + } + } + return look; +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const { + return LOOK(s, nullptr, ctx); +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const { + misc::IntervalSet r; + bool seeThruPreds = true; // ignore preds; get all lookahead + Ref<PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr; + + ATNConfig::Set lookBusy; + antlrcpp::BitSet callRuleStack; + _LOOK(s, stopState, lookContext, r, lookBusy, callRuleStack, seeThruPreds, true); + + return r; +} + +void LL1Analyzer::_LOOK(ATNState *s, ATNState *stopState, Ref<PredictionContext> const& ctx, misc::IntervalSet &look, + ATNConfig::Set &lookBusy, antlrcpp::BitSet &calledRuleStack, bool seeThruPreds, bool addEOF) const { + + Ref<ATNConfig> c = std::make_shared<ATNConfig>(s, 0, ctx); + + if (lookBusy.count(c) > 0) // Keep in mind comparison is based on members of the class, not the actual instance. + return; + + lookBusy.insert(c); + + // ml: s can never be null, hence no need to check if stopState is != null. + if (s == stopState) { + if (ctx == nullptr) { + look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && addEOF) { + look.add(Token::EOF); + return; + } + } + + if (s->getStateType() == ATNState::RULE_STOP) { + if (ctx == nullptr) { + look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && addEOF) { + look.add(Token::EOF); + return; + } + + if (ctx != PredictionContext::EMPTY) { + bool removed = calledRuleStack.test(s->ruleIndex); + calledRuleStack[s->ruleIndex] = false; + auto onExit = finally([removed, &calledRuleStack, s] { + if (removed) { + calledRuleStack.set(s->ruleIndex); + } + }); + // run thru all possible stack tops in ctx + for (size_t i = 0; i < ctx->size(); i++) { + ATNState *returnState = _atn.states[ctx->getReturnState(i)]; + _LOOK(returnState, stopState, ctx->getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } + return; + } + } + + size_t n = s->transitions.size(); + for (size_t i = 0; i < n; i++) { + Transition *t = s->transitions[i]; + + if (t->getSerializationType() == Transition::RULE) { + if (calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex]) { + continue; + } + + Ref<PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<RuleTransition*>(t))->followState->stateNumber); + auto onExit = finally([t, &calledRuleStack] { + calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex] = false; + }); + + calledRuleStack.set((static_cast<RuleTransition*>(t))->target->ruleIndex); + _LOOK(t->target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + + } else if (is<AbstractPredicateTransition *>(t)) { + if (seeThruPreds) { + _LOOK(t->target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } else { + look.add(HIT_PRED); + } + } else if (t->isEpsilon()) { + _LOOK(t->target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } else if (t->getSerializationType() == Transition::WILDCARD) { + look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } else { + misc::IntervalSet set = t->label(); + if (!set.isEmpty()) { + if (is<NotSetTransition*>(t)) { + set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } + look.addAll(set); + } + } + } +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.h new file mode 100644 index 0000000..e297bc9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LL1Analyzer.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LL1Analyzer { + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. +#if __cplusplus >= 201703L + static constexpr size_t HIT_PRED = Token::INVALID_TYPE; +#else + enum : size_t { + HIT_PRED = Token::INVALID_TYPE, + }; +#endif + + const atn::ATN &_atn; + + LL1Analyzer(const atn::ATN &atn); + virtual ~LL1Analyzer(); + + /// <summary> + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an <seealso cref="ATNState"/>. The returned array has one element for each + /// outgoing transition in {@code s}. If the closure from transition + /// <em>i</em> leads to a semantic predicate before matching a symbol, the + /// element at index <em>i</em> of the result will be {@code null}. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <returns> the expected symbols for each outgoing transition of {@code s}. </returns> + virtual std::vector<misc::IntervalSet> getDecisionLookahead(ATNState *s) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + virtual misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + virtual misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and {@code stopState} or the end of the + /// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to + /// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + /// {@code true} and {@code stopState} or the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state. </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> The outer context, or {@code null} if the outer context should + /// not be used. </param> + /// <param name="look"> The result lookahead set. </param> + /// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet<ATNConfig>} for this argument. </param> + /// <param name="calledRuleStack"> A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. </param> + /// <param name="seeThruPreds"> {@code true} to true semantic predicates as + /// implicitly {@code true} and "see through them", otherwise {@code false} + /// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the + /// result if one is encountered. </param> + /// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the + /// outermost context is reached. This parameter has no effect if {@code ctx} + /// is {@code null}. </param> + protected: + virtual void _LOOK(ATNState *s, ATNState *stopState, Ref<PredictionContext> const& ctx, misc::IntervalSet &look, + ATNConfig::Set &lookBusy, antlrcpp::BitSet &calledRuleStack, bool seeThruPreds, bool addEOF) const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.cpp new file mode 100644 index 0000000..db91c3f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.cpp @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/DecisionState.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/LexerActionExecutor.h" + +#include "support/CPPUtils.h" + +#include "atn/LexerATNConfig.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<PredictionContext> const& context) + : ATNConfig(state, alt, context, SemanticContext::NONE), _passedThroughNonGreedyDecision(false) { +} + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<PredictionContext> const& context, + Ref<LexerActionExecutor> const& lexerActionExecutor) + : ATNConfig(state, alt, context, SemanticContext::NONE), _lexerActionExecutor(lexerActionExecutor), + _passedThroughNonGreedyDecision(false) { +} + +LexerATNConfig::LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state) + : ATNConfig(c, state, c->context, c->semanticContext), _lexerActionExecutor(c->_lexerActionExecutor), + _passedThroughNonGreedyDecision(checkNonGreedyDecision(c, state)) { +} + +LexerATNConfig::LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state, Ref<LexerActionExecutor> const& lexerActionExecutor) + : ATNConfig(c, state, c->context, c->semanticContext), _lexerActionExecutor(lexerActionExecutor), + _passedThroughNonGreedyDecision(checkNonGreedyDecision(c, state)) { +} + +LexerATNConfig::LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context) + : ATNConfig(c, state, context, c->semanticContext), _lexerActionExecutor(c->_lexerActionExecutor), + _passedThroughNonGreedyDecision(checkNonGreedyDecision(c, state)) { +} + +Ref<LexerActionExecutor> LexerATNConfig::getLexerActionExecutor() const { + return _lexerActionExecutor; +} + +bool LexerATNConfig::hasPassedThroughNonGreedyDecision() { + return _passedThroughNonGreedyDecision; +} + +size_t LexerATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::update(hashCode, _passedThroughNonGreedyDecision ? 1 : 0); + hashCode = misc::MurmurHash::update(hashCode, _lexerActionExecutor); + hashCode = misc::MurmurHash::finish(hashCode, 6); + return hashCode; +} + +bool LexerATNConfig::operator == (const LexerATNConfig& other) const +{ + if (this == &other) + return true; + + if (_passedThroughNonGreedyDecision != other._passedThroughNonGreedyDecision) + return false; + + if (_lexerActionExecutor == nullptr) + return other._lexerActionExecutor == nullptr; + if (*_lexerActionExecutor != *(other._lexerActionExecutor)) { + return false; + } + + return ATNConfig::operator == (other); +} + +bool LexerATNConfig::checkNonGreedyDecision(Ref<LexerATNConfig> const& source, ATNState *target) { + return source->_passedThroughNonGreedyDecision || + (is<DecisionState*>(target) && (static_cast<DecisionState*>(target))->nonGreedy); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.h new file mode 100644 index 0000000..e25d3d1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNConfig.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LexerATNConfig : public ATNConfig { + public: + LexerATNConfig(ATNState *state, int alt, Ref<PredictionContext> const& context); + LexerATNConfig(ATNState *state, int alt, Ref<PredictionContext> const& context, Ref<LexerActionExecutor> const& lexerActionExecutor); + + LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state); + LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state, Ref<LexerActionExecutor> const& lexerActionExecutor); + LexerATNConfig(Ref<LexerATNConfig> const& c, ATNState *state, Ref<PredictionContext> const& context); + + /** + * Gets the {@link LexerActionExecutor} capable of executing the embedded + * action(s) for the current configuration. + */ + Ref<LexerActionExecutor> getLexerActionExecutor() const; + bool hasPassedThroughNonGreedyDecision(); + + virtual size_t hashCode() const override; + + bool operator == (const LexerATNConfig& other) const; + + private: + /** + * This is the backing field for {@link #getLexerActionExecutor}. + */ + const Ref<LexerActionExecutor> _lexerActionExecutor; + const bool _passedThroughNonGreedyDecision; + + static bool checkNonGreedyDecision(Ref<LexerATNConfig> const& source, ATNState *target); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.cpp new file mode 100644 index 0000000..827c3d5 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.cpp @@ -0,0 +1,628 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" +#include "atn/OrderedATNConfigSet.h" +#include "Token.h" +#include "LexerNoViableAltException.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/PredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/TokensStartState.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" +#include "Lexer.h" + +#include "dfa/DFAState.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerActionExecutor.h" +#include "atn/EmptyPredictionContext.h" + +#include "atn/LexerATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_DFA 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +LexerATNSimulator::SimState::~SimState() { +} + +void LexerATNSimulator::SimState::reset() { + index = INVALID_INDEX; + line = 0; + charPos = INVALID_INDEX; + dfaState = nullptr; // Don't delete. It's just a reference. +} + +void LexerATNSimulator::SimState::InitializeInstanceFields() { + index = INVALID_INDEX; + line = 0; + charPos = INVALID_INDEX; +} + +int LexerATNSimulator::match_calls = 0; + + +LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) { + InitializeInstanceFields(); +} + +void LexerATNSimulator::copyState(LexerATNSimulator *simulator) { + _charPositionInLine = simulator->_charPositionInLine; + _line = simulator->_line; + _mode = simulator->_mode; + _startIndex = simulator->_startIndex; +} + +size_t LexerATNSimulator::match(CharStream *input, size_t mode) { + match_calls++; + _mode = mode; + ssize_t mark = input->mark(); + + auto onExit = finally([input, mark] { + input->release(mark); + }); + + _startIndex = input->index(); + _prevAccept.reset(); + const dfa::DFA &dfa = _decisionToDFA[mode]; + if (dfa.s0 == nullptr) { + return matchATN(input); + } else { + return execATN(input, dfa.s0); + } +} + +void LexerATNSimulator::reset() { + _prevAccept.reset(); + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = Lexer::DEFAULT_MODE; +} + +void LexerATNSimulator::clearDFA() { + size_t size = _decisionToDFA.size(); + _decisionToDFA.clear(); + for (size_t d = 0; d < size; ++d) { + _decisionToDFA.emplace_back(atn.getDecisionState(d), d); + } +} + +size_t LexerATNSimulator::matchATN(CharStream *input) { + ATNState *startState = atn.modeToStartState[_mode]; + + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState); + + bool suppressEdge = s0_closure->hasSemanticContext; + s0_closure->hasSemanticContext = false; + + dfa::DFAState *next = addDFAState(s0_closure.release()); + if (!suppressEdge) { + _decisionToDFA[_mode].s0 = next; + } + + size_t predict = execATN(input, next); + + return predict; +} + +size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) { + if (ds0->isAcceptState) { + // allow zero-length tokens + // ml: in Java code this method uses 3 params. The first is a member var of the class anyway (_prevAccept), so why pass it here? + captureSimState(input, ds0); + } + + size_t t = input->LA(1); + dfa::DFAState *s = ds0; // s is current/from DFA state + + while (true) { // while more work + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + dfa::DFAState *target = getExistingTargetState(s, t); + if (target == nullptr) { + target = computeTargetState(input, s, t); + } + + if (target == ERROR.get()) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != Token::EOF) { + consume(input); + } + + if (target->isAcceptState) { + captureSimState(input, target); + if (t == Token::EOF) { + break; + } + } + + t = input->LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(input, s->configs.get(), t); +} + +dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) { + dfa::DFAState* retval = nullptr; + _edgeLock.readLock(); + if (t <= MAX_DFA_EDGE) { + auto iterator = s->edges.find(t - MIN_DFA_EDGE); +#if DEBUG_ATN == 1 + if (iterator != s->edges.end()) { + std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl; + } +#endif + + if (iterator != s->edges.end()) + retval = iterator->second; + } + _edgeLock.readUnlock(); + return retval; +} + +dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) { + OrderedATNConfigSet *reach = new OrderedATNConfigSet(); /* mem-check: deleted on error or managed by new DFA state. */ + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s->configs.get(), reach, t); + + if (reach->isEmpty()) { // we got nowhere on t from s + if (!reach->hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + delete reach; + addDFAEdge(s, t, ERROR.get()); + } + + // stop when we can't match any more char + return ERROR.get(); + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdge(s, t, reach); +} + +size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) { + if (_prevAccept.dfaState != nullptr) { + Ref<LexerActionExecutor> lexerActionExecutor = _prevAccept.dfaState->lexerActionExecutor; + accept(input, lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos); + return _prevAccept.dfaState->prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == Token::EOF && input->index() == _startIndex) { + return Token::EOF; + } + + throw LexerNoViableAltException(_recog, input, _startIndex, reach); + } +} + +void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + size_t skipAlt = ATN::INVALID_ALT_NUMBER; + + for (auto c : closure_->configs) { + bool currentAltReachedAcceptState = c->alt == skipAlt; + if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) { + continue; + } + +#if DEBUG_ATN == 1 + std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl; +#endif + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + Transition *trans = c->state->transitions[ti]; + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + Ref<LexerActionExecutor> lexerActionExecutor = std::static_pointer_cast<LexerATNConfig>(c)->getLexerActionExecutor(); + if (lexerActionExecutor != nullptr) { + lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex); + } + + bool treatEofAsEpsilon = t == Token::EOF; + Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(std::static_pointer_cast<LexerATNConfig>(c), + target, lexerActionExecutor); + + if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c->alt; + break; + } + } + } + } +} + +void LexerATNSimulator::accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t /*startIndex*/, + size_t index, size_t line, size_t charPos) { +#if DEBUG_ATN == 1 + std::cout << "ACTION "; + std::cout << toString(lexerActionExecutor) << std::endl; +#endif + + // seek to after last char in token + input->seek(index); + _line = line; + _charPositionInLine = (int)charPos; + + if (lexerActionExecutor != nullptr && _recog != nullptr) { + lexerActionExecutor->execute(_recog, input, _startIndex); + } +} + +atn::ATNState *LexerATNSimulator::getReachableTarget(Transition *trans, size_t t) { + if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + return trans->target; + } + + return nullptr; +} + +std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) { + Ref<PredictionContext> initialContext = PredictionContext::EMPTY; // ml: the purpose of this assignment is unclear + std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet()); + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext); + closure(input, c, configs.get(), false, false, false); + } + + return configs; +} + +bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) { +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (is<RuleStopState *>(config->state)) { +#if DEBUG_ATN == 1 + if (_recog != nullptr) { + std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl; + } else { + std::cout << "closure at rule stop " << config << std::endl; + } +#endif + + if (config->context == nullptr || config->context->hasEmptyPath()) { + if (config->context == nullptr || config->context->isEmpty()) { + configs->add(config); + return true; + } else { + configs->add(std::make_shared<LexerATNConfig>(config, config->state, PredictionContext::EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config->context != nullptr && !config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) { + std::weak_ptr<PredictionContext> newContext = config->context->getParent(i); // "pop" return state + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(config, returnState, newContext.lock()); + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config->state->epsilonOnlyTransitions) { + if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) { + configs->add(config); + } + } + + ATNState *p = config->state; + for (size_t i = 0; i < p->transitions.size(); i++) { + Transition *t = p->transitions[i]; + Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != nullptr) { + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; +} + +Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) { + + Ref<LexerATNConfig> c = nullptr; + switch (t->getSerializationType()) { + case Transition::RULE: { + RuleTransition *ruleTransition = static_cast<RuleTransition*>(t); + Ref<PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber); + c = std::make_shared<LexerATNConfig>(config, t->target, newContext); + break; + } + + case Transition::PRECEDENCE: + throw UnsupportedOperationException("Precedence predicates are not supported in lexers."); + + case Transition::PREDICATE: { + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + PredicateTransition *pt = static_cast<PredicateTransition*>(t); + +#if DEBUG_ATN == 1 + std::cout << "EVAL rule " << pt->ruleIndex << ":" << pt->predIndex << std::endl; +#endif + + configs->hasSemanticContext = true; + if (evaluatePredicate(input, pt->ruleIndex, pt->predIndex, speculative)) { + c = std::make_shared<LexerATNConfig>(config, t->target); + } + break; + } + + case Transition::ACTION: + if (config->context == nullptr|| config->context->hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + Ref<LexerActionExecutor> lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(), + atn.lexerActions[static_cast<ActionTransition *>(t)->actionIndex]); + c = std::make_shared<LexerATNConfig>(config, t->target, lexerActionExecutor); + break; + } + else { + // ignore actions in referenced rules + c = std::make_shared<LexerATNConfig>(config, t->target); + break; + } + + case Transition::EPSILON: + c = std::make_shared<LexerATNConfig>(config, t->target); + break; + + case Transition::ATOM: + case Transition::RANGE: + case Transition::SET: + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + c = std::make_shared<LexerATNConfig>(config, t->target); + break; + } + } + + break; + + default: // To silence the compiler. Other transition types are not used here. + break; + } + + return c; +} + +bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) { + // assume true if no recognizer was provided + if (_recog == nullptr) { + return true; + } + + if (!speculative) { + return _recog->sempred(nullptr, ruleIndex, predIndex); + } + + size_t savedCharPositionInLine = _charPositionInLine; + size_t savedLine = _line; + size_t index = input->index(); + ssize_t marker = input->mark(); + + auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] { + _charPositionInLine = savedCharPositionInLine; + _line = savedLine; + input->seek(index); + input->release(marker); + }); + + consume(input); + return _recog->sempred(nullptr, ruleIndex, predIndex); +} + +void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) { + _prevAccept.index = input->index(); + _prevAccept.line = _line; + _prevAccept.charPos = _charPositionInLine; + _prevAccept.dfaState = dfaState; +} + +dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + bool suppressEdge = q->hasSemanticContext; + q->hasSemanticContext = false; + + dfa::DFAState *to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; +} + +void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) { + if (/*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0 + // Only track edges within the DFA bounds + return; + } + + _edgeLock.writeLock(); + p->edges[t - MIN_DFA_EDGE] = q; // connect + _edgeLock.writeUnlock(); +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs->hasSemanticContext); + + dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs)); /* mem-check: managed by the DFA or deleted below */ + Ref<ATNConfig> firstConfigWithRuleStopState = nullptr; + for (auto &c : configs->configs) { + if (is<RuleStopState *>(c->state)) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != nullptr) { + proposed->isAcceptState = true; + proposed->lexerActionExecutor = std::dynamic_pointer_cast<LexerATNConfig>(firstConfigWithRuleStopState)->getLexerActionExecutor(); + proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex]; + } + + dfa::DFA &dfa = _decisionToDFA[_mode]; + + _stateLock.writeLock(); + if (!dfa.states.empty()) { + auto iterator = dfa.states.find(proposed); + if (iterator != dfa.states.end()) { + delete proposed; + _stateLock.writeUnlock(); + return *iterator; + } + } + + proposed->stateNumber = (int)dfa.states.size(); + proposed->configs->setReadonly(true); + + dfa.states.insert(proposed); + _stateLock.writeUnlock(); + + return proposed; +} + +dfa::DFA& LexerATNSimulator::getDFA(size_t mode) { + return _decisionToDFA[mode]; +} + +std::string LexerATNSimulator::getText(CharStream *input) { + // index is first lookahead char, don't include. + return input->getText(misc::Interval(_startIndex, input->index() - 1)); +} + +size_t LexerATNSimulator::getLine() const { + return _line; +} + +void LexerATNSimulator::setLine(size_t line) { + _line = line; +} + +size_t LexerATNSimulator::getCharPositionInLine() { + return _charPositionInLine; +} + +void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +void LexerATNSimulator::consume(CharStream *input) { + size_t curChar = input->LA(1); + if (curChar == '\n') { + _line++; + _charPositionInLine = 0; + } else { + _charPositionInLine++; + } + input->consume(); +} + +std::string LexerATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + return std::string("'") + static_cast<char>(t) + std::string("'"); +} + +void LexerATNSimulator::InitializeInstanceFields() { + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = antlr4::Lexer::DEFAULT_MODE; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.h new file mode 100644 index 0000000..c050d51 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.h @@ -0,0 +1,217 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNSimulator.h" +#include "atn/LexerATNConfig.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { +namespace atn { + + /// "dup" of ParserInterpreter + class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { + protected: + class SimState { + public: + virtual ~SimState(); + + protected: + size_t index; + size_t line; + size_t charPos; + dfa::DFAState *dfaState; + virtual void reset(); + friend class LexerATNSimulator; + + private: + void InitializeInstanceFields(); + + public: + SimState() { + InitializeInstanceFields(); + } + }; + + + public: +#if __cplusplus >= 201703L + static constexpr size_t MIN_DFA_EDGE = 0; + static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN +#else + enum : size_t { + MIN_DFA_EDGE = 0, + MAX_DFA_EDGE = 127, // forces unicode to stay in ATN + }; +#endif + + protected: + /// <summary> + /// When we hit an accept state in either the DFA or the ATN, we + /// have to notify the character stream to start buffering characters + /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state + /// includes the current index into the input, the current line, + /// and current character position in that line. Note that the Lexer is + /// tracking the starting line and characterization of the token. These + /// variables track the "state" of the simulator when it hits an accept state. + /// <p/> + /// We track these variables separately for the DFA and ATN simulation + /// because the DFA simulation often has to fail over to the ATN + /// simulation. If the ATN simulation fails, we need the DFA to fall + /// back to its previously accepted state, if any. If the ATN succeeds, + /// then the ATN does the accept and the DFA simulator that invoked it + /// can simply return the predicted token type. + /// </summary> + Lexer *const _recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + size_t _startIndex; + + /// line number 1..n within the input. + size_t _line; + + /// The index of the character relative to the beginning of the line 0..n-1. + size_t _charPositionInLine; + + public: + std::vector<dfa::DFA> &_decisionToDFA; + + protected: + size_t _mode; + + /// Used during DFA/ATN exec to record the most recent accept configuration info. + SimState _prevAccept; + + public: + static int match_calls; + + LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + virtual ~LexerATNSimulator () {} + + virtual void copyState(LexerATNSimulator *simulator); + virtual size_t match(CharStream *input, size_t mode); + virtual void reset() override; + + virtual void clearDFA() override; + + protected: + virtual size_t matchATN(CharStream *input); + virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="input"> The input stream </param> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); + + virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); + + /// <summary> + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input {@code t}. Parameter {@code reach} is a return + /// parameter. + /// </summary> + void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already + ATNConfigSet *reach, size_t t); + + virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index, + size_t line, size_t charPos); + + virtual ATNState *getReachableTarget(Transition *trans, size_t t); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p); + + /// <summary> + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from {@code config}, all other (potentially reachable) states for + /// this rule would have a lower priority. + /// </summary> + /// <returns> {@code true} if an accept state is reached, otherwise + /// {@code false}. </returns> + virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); + + // side-effect: can alter configs.hasSemanticContext + virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); + + /// <summary> + /// Evaluate a predicate specified in the lexer. + /// <p/> + /// If {@code speculative} is {@code true}, this method was called before + /// <seealso cref="#consume"/> for the matched character. This method should call + /// <seealso cref="#consume"/> before evaluating the predicate to ensure position + /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>, + /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current + /// lexer state. This method should restore {@code input} and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to <seealso cref="#consume"/>. + /// </summary> + /// <param name="input"> The input stream. </param> + /// <param name="ruleIndex"> The rule containing the predicate. </param> + /// <param name="predIndex"> The index of the predicate within the rule. </param> + /// <param name="speculative"> {@code true} if the current index in {@code input} is + /// one character before the predicate's location. + /// </param> + /// <returns> {@code true} if the specified predicate evaluates to + /// {@code true}. </returns> + virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); + + virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); + virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); + virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); + + /// <summary> + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + /// </summary> + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); + + public: + dfa::DFA& getDFA(size_t mode); + + /// Get the text matched so far for the current token. + virtual std::string getText(CharStream *input); + virtual size_t getLine() const; + virtual void setLine(size_t line); + virtual size_t getCharPositionInLine(); + virtual void setCharPositionInLine(size_t charPositionInLine); + virtual void consume(CharStream *input); + virtual std::string getTokenName(size_t t); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.cpp new file mode 100644 index 0000000..983ba6d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "LexerAction.h" + +antlr4::atn::LexerAction::~LexerAction() { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.h new file mode 100644 index 0000000..8e833b6 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerAction.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents a single action which can be executed following the successful + /// match of a lexer rule. Lexer actions are used for both embedded action syntax + /// and ANTLR 4's new lexer command syntax. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerAction { + public: + virtual ~LexerAction(); + + /// <summary> + /// Gets the serialization type of the lexer action. + /// </summary> + /// <returns> The serialization type of the lexer action. </returns> + virtual LexerActionType getActionType() const = 0; + + /// <summary> + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the <seealso cref="CharStream"/> + /// index at the time the action is executed. + /// + /// <para>Many lexer commands, including {@code type}, {@code skip}, and + /// {@code more}, do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the <seealso cref="LexerATNConfig#lexerActionExecutor"/>.</para> + /// </summary> + /// <returns> {@code true} if the lexer action semantics can be affected by the + /// position of the input <seealso cref="CharStream"/> at the time it is executed; + /// otherwise, {@code false}. </returns> + virtual bool isPositionDependent() const = 0; + + /// <summary> + /// Execute the lexer action in the context of the specified <seealso cref="Lexer"/>. + /// + /// <para>For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + virtual void execute(Lexer *lexer) = 0; + + virtual size_t hashCode() const = 0; + virtual bool operator == (const LexerAction &obj) const = 0; + virtual bool operator != (const LexerAction &obj) const { + return !(*this == obj); + } + + virtual std::string toString() const = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.cpp new file mode 100644 index 0000000..1ae510f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.cpp @@ -0,0 +1,107 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/LexerIndexedCustomAction.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" + +#include "atn/LexerActionExecutor.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerActionExecutor::LexerActionExecutor(const std::vector<Ref<LexerAction>> &lexerActions) + : _lexerActions(lexerActions), _hashCode(generateHashCode()) { +} + +LexerActionExecutor::~LexerActionExecutor() { +} + +Ref<LexerActionExecutor> LexerActionExecutor::append(Ref<LexerActionExecutor> const& lexerActionExecutor, + Ref<LexerAction> const& lexerAction) { + if (lexerActionExecutor == nullptr) { + return std::make_shared<LexerActionExecutor>(std::vector<Ref<LexerAction>> { lexerAction }); + } + + std::vector<Ref<LexerAction>> lexerActions = lexerActionExecutor->_lexerActions; // Make a copy. + lexerActions.push_back(lexerAction); + return std::make_shared<LexerActionExecutor>(lexerActions); +} + +Ref<LexerActionExecutor> LexerActionExecutor::fixOffsetBeforeMatch(int offset) { + std::vector<Ref<LexerAction>> updatedLexerActions; + for (size_t i = 0; i < _lexerActions.size(); i++) { + if (_lexerActions[i]->isPositionDependent() && !is<LexerIndexedCustomAction>(_lexerActions[i])) { + if (updatedLexerActions.empty()) { + updatedLexerActions = _lexerActions; // Make a copy. + } + + updatedLexerActions[i] = std::make_shared<LexerIndexedCustomAction>(offset, _lexerActions[i]); + } + } + + if (updatedLexerActions.empty()) { + return shared_from_this(); + } + + return std::make_shared<LexerActionExecutor>(updatedLexerActions); +} + +std::vector<Ref<LexerAction>> LexerActionExecutor::getLexerActions() const { + return _lexerActions; +} + +void LexerActionExecutor::execute(Lexer *lexer, CharStream *input, size_t startIndex) { + bool requiresSeek = false; + size_t stopIndex = input->index(); + + auto onExit = finally([requiresSeek, input, stopIndex]() { + if (requiresSeek) { + input->seek(stopIndex); + } + }); + for (auto lexerAction : _lexerActions) { + if (is<LexerIndexedCustomAction>(lexerAction)) { + int offset = (std::static_pointer_cast<LexerIndexedCustomAction>(lexerAction))->getOffset(); + input->seek(startIndex + offset); + lexerAction = std::static_pointer_cast<LexerIndexedCustomAction>(lexerAction)->getAction(); + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction->isPositionDependent()) { + input->seek(stopIndex); + requiresSeek = false; + } + + lexerAction->execute(lexer); + } +} + +size_t LexerActionExecutor::hashCode() const { + return _hashCode; +} + +bool LexerActionExecutor::operator == (const LexerActionExecutor &obj) const { + if (&obj == this) { + return true; + } + + return _hashCode == obj._hashCode && Arrays::equals(_lexerActions, obj._lexerActions); +} + +bool LexerActionExecutor::operator != (const LexerActionExecutor &obj) const { + return !operator==(obj); +} + +size_t LexerActionExecutor::generateHashCode() const { + size_t hash = MurmurHash::initialize(); + for (auto lexerAction : _lexerActions) { + hash = MurmurHash::update(hash, lexerAction); + } + hash = MurmurHash::finish(hash, _lexerActions.size()); + + return hash; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.h new file mode 100644 index 0000000..488b54c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionExecutor.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Represents an executor for a sequence of lexer actions which traversed during + /// the matching operation of a lexer rule (token). + /// + /// <para>The executor tracks position information for position-dependent lexer actions + /// efficiently, ensuring that actions appearing only at the end of the rule do + /// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para> + class ANTLR4CPP_PUBLIC LexerActionExecutor : public std::enable_shared_from_this<LexerActionExecutor> { + public: + /// <summary> + /// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary> + /// <param name="lexerActions"> The lexer actions to execute. </param> + LexerActionExecutor(const std::vector<Ref<LexerAction>> &lexerActions); + virtual ~LexerActionExecutor(); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for + /// the input {@code lexerActionExecutor} followed by a specified + /// {@code lexerAction}. + /// </summary> + /// <param name="lexerActionExecutor"> The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as + /// though it were an empty executor. </param> + /// <param name="lexerAction"> The lexer action to execute after the actions + /// specified in {@code lexerActionExecutor}. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions + /// of {@code lexerActionExecutor} and {@code lexerAction}. </returns> + static Ref<LexerActionExecutor> append(Ref<LexerActionExecutor> const& lexerActionExecutor, + Ref<LexerAction> const& lexerAction); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset + /// for position-dependent lexer actions. + /// + /// <para>Normally, when the executor encounters lexer actions where + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls + /// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input + /// position to the <em>end</em> of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters.</para> + /// + /// <para>Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream.</para> + /// + /// <para>If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns {@code this}.</para> + /// </summary> + /// <param name="offset"> The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets + /// for all position-dependent lexer actions. </returns> + virtual Ref<LexerActionExecutor> fixOffsetBeforeMatch(int offset); + + /// <summary> + /// Gets the lexer actions to be executed by this executor. </summary> + /// <returns> The lexer actions to be executed by this executor. </returns> + virtual std::vector<Ref<LexerAction>> getLexerActions() const; + + /// <summary> + /// Execute the actions encapsulated by this executor within the context of a + /// particular <seealso cref="Lexer"/>. + /// + /// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the + /// {@code input} <seealso cref="CharStream"/> prior to calling + /// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + /// <param name="input"> The input stream which is the source for the current token. + /// When this method is called, the current <seealso cref="IntStream#index"/> for + /// {@code input} should be the start of the following token, i.e. 1 + /// character past the end of the current token. </param> + /// <param name="startIndex"> The token start index. This value may be passed to + /// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning + /// of the token. </param> + virtual void execute(Lexer *lexer, CharStream *input, size_t startIndex); + + virtual size_t hashCode() const; + virtual bool operator == (const LexerActionExecutor &obj) const; + virtual bool operator != (const LexerActionExecutor &obj) const; + + private: + const std::vector<Ref<LexerAction>> _lexerActions; + + /// Caches the result of <seealso cref="#hashCode"/> since the hash code is an element + /// of the performance-critical <seealso cref="LexerATNConfig#hashCode"/> operation. + const size_t _hashCode; + + size_t generateHashCode() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionType.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionType.h new file mode 100644 index 0000000..a72f15c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerActionType.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents the serialization type of a <seealso cref="LexerAction"/>. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + enum class LexerActionType : size_t { + /// <summary> + /// The type of a <seealso cref="LexerChannelAction"/> action. + /// </summary> + CHANNEL, + /// <summary> + /// The type of a <seealso cref="LexerCustomAction"/> action. + /// </summary> + CUSTOM, + /// <summary> + /// The type of a <seealso cref="LexerModeAction"/> action. + /// </summary> + MODE, + /// <summary> + /// The type of a <seealso cref="LexerMoreAction"/> action. + /// </summary> + MORE, + /// <summary> + /// The type of a <seealso cref="LexerPopModeAction"/> action. + /// </summary> + POP_MODE, + /// <summary> + /// The type of a <seealso cref="LexerPushModeAction"/> action. + /// </summary> + PUSH_MODE, + /// <summary> + /// The type of a <seealso cref="LexerSkipAction"/> action. + /// </summary> + SKIP, + /// <summary> + /// The type of a <seealso cref="LexerTypeAction"/> action. + /// </summary> + TYPE, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.cpp new file mode 100644 index 0000000..959beab --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.cpp @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerChannelAction.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerChannelAction::LexerChannelAction(int channel) : _channel(channel) { +} + +int LexerChannelAction::getChannel() const { + return _channel; +} + +LexerActionType LexerChannelAction::getActionType() const { + return LexerActionType::CHANNEL; +} + +bool LexerChannelAction::isPositionDependent() const { + return false; +} + +void LexerChannelAction::execute(Lexer *lexer) { + lexer->setChannel(_channel); +} + +size_t LexerChannelAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, _channel); + return MurmurHash::finish(hash, 2); +} + +bool LexerChannelAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerChannelAction *action = dynamic_cast<const LexerChannelAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _channel == action->_channel; +} + +std::string LexerChannelAction::toString() const { + return "channel(" + std::to_string(_channel) + ")"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.h new file mode 100644 index 0000000..73e3a26 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerChannelAction.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + using antlr4::Lexer; + + /// <summary> + /// Implements the {@code channel} lexer action by calling + /// <seealso cref="Lexer#setChannel"/> with the assigned channel. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction { + public: + /// <summary> + /// Constructs a new {@code channel} action with the specified channel value. </summary> + /// <param name="channel"> The channel value to pass to <seealso cref="Lexer#setChannel"/>. </param> + LexerChannelAction(int channel); + + /// <summary> + /// Gets the channel to use for the <seealso cref="Token"/> created by the lexer. + /// </summary> + /// <returns> The channel to use for the <seealso cref="Token"/> created by the lexer. </returns> + int getChannel() const; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#CHANNEL"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setChannel"/> with the + /// value provided by <seealso cref="#getChannel"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _channel; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.cpp new file mode 100644 index 0000000..1e977a3 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.cpp @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "support/CPPUtils.h" +#include "Lexer.h" + +#include "atn/LexerCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerCustomAction::LexerCustomAction(size_t ruleIndex, size_t actionIndex) : _ruleIndex(ruleIndex), _actionIndex(actionIndex) { +} + +size_t LexerCustomAction::getRuleIndex() const { + return _ruleIndex; +} + +size_t LexerCustomAction::getActionIndex() const { + return _actionIndex; +} + +LexerActionType LexerCustomAction::getActionType() const { + return LexerActionType::CUSTOM; +} + +bool LexerCustomAction::isPositionDependent() const { + return true; +} + +void LexerCustomAction::execute(Lexer *lexer) { + lexer->action(nullptr, _ruleIndex, _actionIndex); +} + +size_t LexerCustomAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, _ruleIndex); + hash = MurmurHash::update(hash, _actionIndex); + return MurmurHash::finish(hash, 3); +} + +bool LexerCustomAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerCustomAction *action = dynamic_cast<const LexerCustomAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _ruleIndex == action->_ruleIndex && _actionIndex == action->_actionIndex; +} + +std::string LexerCustomAction::toString() const { + return antlrcpp::toString(this); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.h new file mode 100644 index 0000000..bd1c5d3 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerCustomAction.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Executes a custom lexer action by calling <seealso cref="Recognizer#action"/> with the + /// rule and action indexes assigned to the custom action. The implementation of + /// a custom action is added to the generated code for the lexer in an override + /// of <seealso cref="Recognizer#action"/> when the grammar is compiled. + /// + /// <para>This class may represent embedded actions created with the <code>{...}</code> + /// syntax in ANTLR 4, as well as actions created for lexer commands where the + /// command argument could not be evaluated when the grammar was compiled.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction { + public: + /// <summary> + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// </summary> + /// <param name="ruleIndex"> The rule index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + /// <param name="actionIndex"> The action index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + LexerCustomAction(size_t ruleIndex, size_t actionIndex); + + /// <summary> + /// Gets the rule index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The rule index for the custom action. </returns> + size_t getRuleIndex() const; + + /// <summary> + /// Gets the action index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The action index for the custom action. </returns> + size_t getActionIndex() const; + + /// <summary> + /// {@inheritDoc} + /// </summary> + /// <returns> This method returns <seealso cref="LexerActionType#CUSTOM"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the <seealso cref="CharStream"/> + /// index at the time the action is executed. + /// + /// <para>Custom actions are position-dependent since they may represent a + /// user-defined embedded action which makes calls to methods like + /// <seealso cref="Lexer#getText"/>.</para> + /// </summary> + /// <returns> This method returns {@code true}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>Custom actions are implemented by calling <seealso cref="Lexer#action"/> with the + /// appropriate rule and action indexes.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const size_t _ruleIndex; + const size_t _actionIndex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.cpp new file mode 100644 index 0000000..9ea396a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/CPPUtils.h" + +#include "atn/LexerIndexedCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerIndexedCustomAction::LexerIndexedCustomAction(int offset, Ref<LexerAction> const& action) + : _offset(offset), _action(action) { +} + +int LexerIndexedCustomAction::getOffset() const { + return _offset; +} + +Ref<LexerAction> LexerIndexedCustomAction::getAction() const { + return _action; +} + +LexerActionType LexerIndexedCustomAction::getActionType() const { + return _action->getActionType(); +} + +bool LexerIndexedCustomAction::isPositionDependent() const { + return true; +} + +void LexerIndexedCustomAction::execute(Lexer *lexer) { + // assume the input stream position was properly set by the calling code + _action->execute(lexer); +} + +size_t LexerIndexedCustomAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, _offset); + hash = MurmurHash::update(hash, _action); + return MurmurHash::finish(hash, 2); +} + +bool LexerIndexedCustomAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerIndexedCustomAction *action = dynamic_cast<const LexerIndexedCustomAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _offset == action->_offset && *_action == *action->_action; +} + +std::string LexerIndexedCustomAction::toString() const { + return antlrcpp::toString(this); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.h new file mode 100644 index 0000000..bb371f8 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerIndexedCustomAction.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This implementation of <seealso cref="LexerAction"/> is used for tracking input offsets + /// for position-dependent actions within a <seealso cref="LexerActionExecutor"/>. + /// + /// <para>This action is not serialized as part of the ATN, and is only required for + /// position-dependent lexer actions which appear at a location other than the + /// end of a rule. For more information about DFA optimizations employed for + /// lexer actions, see <seealso cref="LexerActionExecutor#append"/> and + /// <seealso cref="LexerActionExecutor#fixOffsetBeforeMatch"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction { + public: + /// <summary> + /// Constructs a new indexed custom action by associating a character offset + /// with a <seealso cref="LexerAction"/>. + /// + /// <para>Note: This class is only required for lexer actions for which + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}.</para> + /// </summary> + /// <param name="offset"> The offset into the input <seealso cref="CharStream"/>, relative to + /// the token start index, at which the specified lexer action should be + /// executed. </param> + /// <param name="action"> The lexer action to execute at a particular offset in the + /// input <seealso cref="CharStream"/>. </param> + LexerIndexedCustomAction(int offset, Ref<LexerAction> const& action); + + /// <summary> + /// Gets the location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// </summary> + /// <returns> The location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. </returns> + int getOffset() const; + + /// <summary> + /// Gets the lexer action to execute. + /// </summary> + /// <returns> A <seealso cref="LexerAction"/> object which executes the lexer action. </returns> + Ref<LexerAction> getAction() const; + + /// <summary> + /// {@inheritDoc} + /// </summary> + /// <returns> This method returns the result of calling <seealso cref="#getActionType"/> + /// on the <seealso cref="LexerAction"/> returned by <seealso cref="#getAction"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code true}. </returns> + virtual bool isPositionDependent() const override; + + virtual void execute(Lexer *lexer) override; + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _offset; + const Ref<LexerAction> _action; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.cpp new file mode 100644 index 0000000..0bda8b7 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerModeAction::LexerModeAction(int mode) : _mode(mode) { +} + +int LexerModeAction::getMode() { + return _mode; +} + +LexerActionType LexerModeAction::getActionType() const { + return LexerActionType::MODE; +} + +bool LexerModeAction::isPositionDependent() const { + return false; +} + +void LexerModeAction::execute(Lexer *lexer) { + lexer->setMode(_mode); +} + +size_t LexerModeAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, _mode); + return MurmurHash::finish(hash, 2); +} + +bool LexerModeAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerModeAction *action = dynamic_cast<const LexerModeAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _mode == action->_mode; +} + +std::string LexerModeAction::toString() const { + return "mode(" + std::to_string(_mode) + ")"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.h new file mode 100644 index 0000000..49a858b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerModeAction.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code mode} lexer action by calling <seealso cref="Lexer#mode"/> with + /// the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction { + public: + /// <summary> + /// Constructs a new {@code mode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#mode"/>. </param> + LexerModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code mode} command. </returns> + int getMode(); + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#MODE"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#mode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.cpp new file mode 100644 index 0000000..99b2dd9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerMoreAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<LexerMoreAction> LexerMoreAction::getInstance() { + static Ref<LexerMoreAction> instance(new LexerMoreAction()); + return instance; +} + +LexerMoreAction::LexerMoreAction() { +} + +LexerActionType LexerMoreAction::getActionType() const { + return LexerActionType::MORE; +} + +bool LexerMoreAction::isPositionDependent() const { + return false; +} + +void LexerMoreAction::execute(Lexer *lexer) { + lexer->more(); +} + +size_t LexerMoreAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerMoreAction::operator == (const LexerAction &obj) const { + return &obj == this; +} + +std::string LexerMoreAction::toString() const { + return "more"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.h new file mode 100644 index 0000000..ee3b2aa --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerMoreAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code more} lexer action by calling <seealso cref="Lexer#more"/>. + /// + /// <para>The {@code more} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction { + public: + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<LexerMoreAction> getInstance(); + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#MORE"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#more"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code more} command. + LexerMoreAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.cpp new file mode 100644 index 0000000..cac0996 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerPopModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<LexerPopModeAction> LexerPopModeAction::getInstance() { + static Ref<LexerPopModeAction> instance(new LexerPopModeAction()); + return instance; +} + +LexerPopModeAction::LexerPopModeAction() { +} + +LexerActionType LexerPopModeAction::getActionType() const { + return LexerActionType::POP_MODE; +} + +bool LexerPopModeAction::isPositionDependent() const { + return false; +} + +void LexerPopModeAction::execute(Lexer *lexer) { + lexer->popMode(); +} + +size_t LexerPopModeAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerPopModeAction::operator == (const LexerAction &obj) const { + return &obj == this; +} + +std::string LexerPopModeAction::toString() const { + return "popMode"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.h new file mode 100644 index 0000000..497305c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPopModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code popMode} lexer action by calling <seealso cref="Lexer#popMode"/>. + /// + /// <para>The {@code popMode} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction { + public: + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<LexerPopModeAction> getInstance(); + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#POP_MODE"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#popMode"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code popMode} command. + LexerPopModeAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.cpp new file mode 100644 index 0000000..017abed --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerPushModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerPushModeAction::LexerPushModeAction(int mode) : _mode(mode) { +} + +int LexerPushModeAction::getMode() const { + return _mode; +} + +LexerActionType LexerPushModeAction::getActionType() const { + return LexerActionType::PUSH_MODE; +} + +bool LexerPushModeAction::isPositionDependent() const { + return false; +} + +void LexerPushModeAction::execute(Lexer *lexer) { + lexer->pushMode(_mode); +} + +size_t LexerPushModeAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, _mode); + return MurmurHash::finish(hash, 2); +} + +bool LexerPushModeAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerPushModeAction *action = dynamic_cast<const LexerPushModeAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _mode == action->_mode; +} + +std::string LexerPushModeAction::toString() const { + return "pushMode(" + std::to_string(_mode) + ")"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.h new file mode 100644 index 0000000..43cb888 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerPushModeAction.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code pushMode} lexer action by calling + /// <seealso cref="Lexer#pushMode"/> with the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction { + public: + /// <summary> + /// Constructs a new {@code pushMode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#pushMode"/>. </param> + LexerPushModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code pushMode} command. </returns> + int getMode() const; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#PUSH_MODE"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#pushMode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.cpp new file mode 100644 index 0000000..01947ce --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerSkipAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<LexerSkipAction> LexerSkipAction::getInstance() { + static Ref<LexerSkipAction> instance(new LexerSkipAction()); + return instance; +} + +LexerSkipAction::LexerSkipAction() { +} + +LexerActionType LexerSkipAction::getActionType() const { + return LexerActionType::SKIP; +} + +bool LexerSkipAction::isPositionDependent() const { + return false; +} + +void LexerSkipAction::execute(Lexer *lexer) { + lexer->skip(); +} + +size_t LexerSkipAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerSkipAction::operator == (const LexerAction &obj) const { + return &obj == this; +} + +std::string LexerSkipAction::toString() const { + return "skip"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.h new file mode 100644 index 0000000..5bd2e1c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerSkipAction.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code skip} lexer action by calling <seealso cref="Lexer#skip"/>. + /// + /// <para>The {@code skip} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction { + public: + /// Provides a singleton instance of this parameterless lexer action. + static const Ref<LexerSkipAction> getInstance(); + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#SKIP"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#skip"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + /// Constructs the singleton instance of the lexer {@code skip} command. + LexerSkipAction(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.cpp new file mode 100644 index 0000000..006778a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerTypeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +LexerTypeAction::LexerTypeAction(int type) : _type(type) { +} + +int LexerTypeAction::getType() const { + return _type; +} + +LexerActionType LexerTypeAction::getActionType() const { + return LexerActionType::TYPE; +} + +bool LexerTypeAction::isPositionDependent() const { + return false; +} + +void LexerTypeAction::execute(Lexer *lexer) { + lexer->setType(_type); +} + +size_t LexerTypeAction::hashCode() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, _type); + return MurmurHash::finish(hash, 2); +} + +bool LexerTypeAction::operator == (const LexerAction &obj) const { + if (&obj == this) { + return true; + } + + const LexerTypeAction *action = dynamic_cast<const LexerTypeAction *>(&obj); + if (action == nullptr) { + return false; + } + + return _type == action->_type; +} + +std::string LexerTypeAction::toString() const { + return "type(" + std::to_string(_type) + ")"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.h new file mode 100644 index 0000000..1c4a8a1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerTypeAction.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Implements the {@code type} lexer action by calling <seealso cref="Lexer#setType"/> + /// with the assigned type. + class ANTLR4CPP_PUBLIC LexerTypeAction : public LexerAction { + public: + /// <summary> + /// Constructs a new {@code type} action with the specified token type value. </summary> + /// <param name="type"> The type to assign to the token using <seealso cref="Lexer#setType"/>. </param> + LexerTypeAction(int type); + + /// <summary> + /// Gets the type to assign to a token created by the lexer. </summary> + /// <returns> The type to assign to a token created by the lexer. </returns> + virtual int getType() const; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns <seealso cref="LexerActionType#TYPE"/>. </returns> + virtual LexerActionType getActionType() const override; + + /// <summary> + /// {@inheritDoc} </summary> + /// <returns> This method returns {@code false}. </returns> + virtual bool isPositionDependent() const override; + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setType"/> with the + /// value provided by <seealso cref="#getType"/>.</para> + /// </summary> + virtual void execute(Lexer *lexer) override; + + virtual size_t hashCode() const override; + virtual bool operator == (const LexerAction &obj) const override; + virtual std::string toString() const override; + + private: + const int _type; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.cpp new file mode 100644 index 0000000..aa3f912 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LookaheadEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +LookaheadEventInfo::LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->predictedAlt = predictedAlt; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.h new file mode 100644 index 0000000..f5fc24f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LookaheadEventInfo.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// This class represents profiling event information for tracking the lookahead + /// depth required in order to make a prediction. + class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo { + public: + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + size_t predictedAlt = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="LookaheadEventInfo"/> class with + /// the specified detailed lookahead information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the necessary + /// information to determine the result of a prediction, or {@code null} if + /// the final configuration set is not available </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the prediction was finally made </param> + /// <param name="fullCtx"> {@code true} if the current lookahead is part of an LL + /// prediction; otherwise, {@code false} if the current lookahead is part of + /// an SLL prediction </param> + LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.cpp new file mode 100644 index 0000000..e008898 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LoopEndState.h" + +using namespace antlr4::atn; + +size_t LoopEndState::getStateType() { + return LOOP_END; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.h new file mode 100644 index 0000000..c90efa3 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LoopEndState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Mark the end of a * or + loop. + class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState { + public: + ATNState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Makefile b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Makefile new file mode 100644 index 0000000..480bd85 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Makefile @@ -0,0 +1,67 @@ + +CXXFLAGS += -g -std=c++0x -Wall #-Wextra +CXXFLAGS += -I. -I../ -I../misc/ -I../tree/ -I../dfa/ \ + -I../../../../../antlrcpp/ + +#TODO LDFLAGS += ? + +ALL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) +ALL_LDFLAGS = $(LDFLAGS) + +# Escote's files +SRCS = \ + AbstractPredicateTransition.cpp \ + ActionTransition.cpp \ + ArrayPredictionContext.cpp \ + ATNDeserializationOptions.cpp \ + ATNDeserializer.cpp \ + ATNState.cpp \ + ATNType.cpp \ + AtomTransition.cpp \ + BasicBlockStartState.cpp \ + BasicState.cpp \ + BlockEndState.cpp \ + BlockStartState.cpp \ + DecisionState.cpp \ + EmptyPredictionContext.cpp \ + EpsilonTransition.cpp \ + LexerATNConfig.cpp \ + LoopEndState.cpp +# Escote's TODO: LL1Analyzer.cpp LexerATNSimulator.cpp ATNSimulator.cpp \ + ATNSerializer.cpp ATNConfigSet.cpp ATNConfig.cpp \ + ATN.cpp + +# Alejandro's files +SRCS += \ + NotSetTransition.cpp \ + OrderedATNConfigSet.cpp \ + PlusBlockStartState.cpp \ + PlusLoopbackState.cpp \ + PredicateTransition.cpp \ + PredictionMode.cpp \ + RangeTransition.cpp \ + RuleStartState.cpp \ + RuleStopState.cpp \ + RuleTransition.cpp \ + SemanticContext.cpp \ + SetTransition.cpp \ + SingletonPredictionContext.cpp \ + StarBlockStartState.cpp \ + StarLoopbackState.cpp \ + StarLoopEntryState.cpp \ + TokensStartState.cpp \ + Transition.cpp \ + WildcardTransition.cpp +# Alejandro's TODO: PredictionContext.cpp PredictionContextCache.cpp \ + PrecedencePredicateTransition.cpp ParserATNSimulator.cpp + +OBJS = $(SRCS:.cpp=.o) + +all: $(OBJS) + +%.o: %.cpp + $(CXX) -c $(ALL_CXXFLAGS) $< -o $@ + +clean: + $(RM) $(OBJS) + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.cpp new file mode 100644 index 0000000..b02910d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/NotSetTransition.h" +#include "atn/ATNState.h" +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::atn; + +NotSetTransition::NotSetTransition(ATNState *target, const misc::IntervalSet &set) : SetTransition(target, set) { +} + +Transition::SerializationType NotSetTransition::getSerializationType() const { + return NOT_SET; +} + +bool NotSetTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol + && !SetTransition::matches(symbol, minVocabSymbol, maxVocabSymbol); +} + +std::string NotSetTransition::toString() const { + return "NOT_SET " + Transition::toString() + " { " + SetTransition::toString() + " }"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.h new file mode 100644 index 0000000..214fb06 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/NotSetTransition.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SetTransition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition { + public: + NotSetTransition(ATNState *target, const misc::IntervalSet &set); + + virtual SerializationType getSerializationType() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.cpp new file mode 100644 index 0000000..a731def --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/OrderedATNConfigSet.h" + +using namespace antlr4::atn; + +size_t OrderedATNConfigSet::getHash(ATNConfig *c) { + return c->hashCode(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.h new file mode 100644 index 0000000..4ce43bb --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/OrderedATNConfigSet.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC OrderedATNConfigSet : public ATNConfigSet { + protected: + virtual size_t getHash(ATNConfig *c) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.cpp new file mode 100644 index 0000000..95a89ac --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ProfilingATNSimulator.h" +#include "dfa/DFA.h" + +#include "atn/ParseInfo.h" + +using namespace antlr4::atn; + +ParseInfo::ParseInfo(ProfilingATNSimulator *atnSimulator) : _atnSimulator(atnSimulator) { +} + +ParseInfo::~ParseInfo() { +} + +std::vector<DecisionInfo> ParseInfo::getDecisionInfo() { + return _atnSimulator->getDecisionInfo(); +} + +std::vector<size_t> ParseInfo::getLLDecisions() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + std::vector<size_t> LL; + for (size_t i = 0; i < decisions.size(); ++i) { + long long fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) { + LL.push_back(i); + } + } + return LL; +} + +long long ParseInfo::getTotalTimeInPrediction() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long t = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + t += decisions[i].timeInPrediction; + } + return t; +} + +long long ParseInfo::getTotalSLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); i++) { + k += decisions[i].LL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalSLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +size_t ParseInfo::getDFASize() { + size_t n = 0; + std::vector<dfa::DFA> &decisionToDFA = _atnSimulator->decisionToDFA; + for (size_t i = 0; i < decisionToDFA.size(); ++i) { + n += getDFASize(i); + } + return n; +} + +size_t ParseInfo::getDFASize(size_t decision) { + dfa::DFA &decisionToDFA = _atnSimulator->decisionToDFA[decision]; + return decisionToDFA.states.size(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.h new file mode 100644 index 0000000..7ced7de --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParseInfo.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ProfilingATNSimulator; + + /// This class provides access to specific and aggregate statistics gathered + /// during profiling of a parser. + class ANTLR4CPP_PUBLIC ParseInfo { + public: + ParseInfo(ProfilingATNSimulator *atnSimulator); + ParseInfo(ParseInfo const&) = default; + virtual ~ParseInfo(); + + ParseInfo& operator=(ParseInfo const&) = default; + + /// <summary> + /// Gets an array of <seealso cref="DecisionInfo"/> instances containing the profiling + /// information gathered for each decision in the ATN. + /// </summary> + /// <returns> An array of <seealso cref="DecisionInfo"/> instances, indexed by decision + /// number. </returns> + virtual std::vector<DecisionInfo> getDecisionInfo(); + + /// <summary> + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// <seealso cref="DecisionInfo#LL_Fallback"/> is non-zero. + /// </summary> + /// <returns> A list of decision numbers which required one or more + /// full-context predictions during parsing. </returns> + virtual std::vector<size_t> getLLDecisions(); + + /// <summary> + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#timeInPrediction"/> for all decisions. + /// </summary> + virtual long long getTotalTimeInPrediction(); + + /// <summary> + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#SLL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalSLLLookaheadOps(); + + /// <summary> + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#LL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalLLLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalSLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + /// <para> + /// This value is the sum of <seealso cref="#getTotalSLLATNLookaheadOps"/> and + /// <seealso cref="#getTotalLLATNLookaheadOps"/>.</para> + /// </summary> + virtual long long getTotalATNLookaheadOps(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + /// </summary> + virtual size_t getDFASize(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + /// </summary> + virtual size_t getDFASize(size_t decision); + + protected: + const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.cpp new file mode 100644 index 0000000..01c83c1 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.cpp @@ -0,0 +1,1370 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "NoViableAltException.h" +#include "atn/DecisionState.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" +#include "Parser.h" +#include "CommonTokenStream.h" +#include "atn/EmptyPredictionContext.h" +#include "atn/NotSetTransition.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +#include "atn/StarLoopEntryState.h" +#include "atn/BlockStartState.h" +#include "atn/BlockEndState.h" + +#include "misc/Interval.h" +#include "ANTLRErrorListener.h" + +#include "Vocabulary.h" +#include "support/Arrays.h" + +#include "atn/ParserATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_LIST_ATN_DECISIONS 0 +#define DEBUG_DFA 0 +#define RETRY_DEBUG 0 + +using namespace antlr4; +using namespace antlr4::atn; + +using namespace antlrcpp; + +const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting(); + +ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ATNSimulator(atn, sharedContextCache), decisionToDFA(decisionToDFA), parser(parser) { + InitializeInstanceFields(); +} + +void ParserATNSimulator::reset() { +} + +void ParserATNSimulator::clearDFA() { + int size = (int)decisionToDFA.size(); + decisionToDFA.clear(); + for (int d = 0; d < size; ++d) { + decisionToDFA.push_back(dfa::DFA(atn.getDecisionState(d), d)); + } +} + +size_t ParserATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "adaptivePredict decision " << decision << " exec LA(1)==" << getLookaheadName(input) << " line " + << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + _input = input; + _startIndex = input->index(); + _outerContext = outerContext; + dfa::DFA &dfa = decisionToDFA[decision]; + _dfa = &dfa; + + ssize_t m = input->mark(); + size_t index = _startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + auto onExit = finally([this, input, index, m] { + mergeCache.clear(); // wack cache after each prediction + _dfa = nullptr; + input->seek(index); + input->release(m); + }); + + dfa::DFAState *s0; + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(parser->getPrecedence()); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + + if (s0 == nullptr) { + bool fullCtx = false; + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(dynamic_cast<ATNState *>(dfa.atnStartState), + &ParserRuleContext::EMPTY, fullCtx); + + _stateLock.writeLock(); + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + dfa.s0->configs = std::move(s0_closure); // not used for prediction but useful to know start configs anyway + dfa::DFAState *newState = new dfa::DFAState(applyPrecedenceFilter(dfa.s0->configs.get())); /* mem-check: managed by the DFA or deleted below */ + s0 = addDFAState(dfa, newState); + dfa.setPrecedenceStartState(parser->getPrecedence(), s0, _edgeLock); + if (s0 != newState) { + delete newState; // If there was already a state with this config set we don't need the new one. + } + } else { + dfa::DFAState *newState = new dfa::DFAState(std::move(s0_closure)); /* mem-check: managed by the DFA or deleted below */ + s0 = addDFAState(dfa, newState); + + if (dfa.s0 != s0) { + delete dfa.s0; // Delete existing s0 DFA state, if there's any. + dfa.s0 = s0; + } + if (s0 != newState) { + delete newState; // If there was already a state with this config set we don't need the new one. + } + } + _stateLock.writeUnlock(); + } + + // We can start with an existing DFA. + size_t alt = execATN(dfa, s0, input, index, outerContext != nullptr ? outerContext : &ParserRuleContext::EMPTY); + + return alt; +} + +size_t ParserATNSimulator::execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "execATN decision " << dfa.decision << " exec LA(1)==" << getLookaheadName(input) << + " line " << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + dfa::DFAState *previousD = s0; + +#if DEBUG_ATN == 1 + std::cout << "s0 = " << s0 << std::endl; +#endif + + size_t t = input->LA(1); + + while (true) { // while more work + dfa::DFAState *D = getExistingTargetState(previousD, t); + if (D == nullptr) { + D = computeTargetState(dfa, previousD, t); + } + + if (D == ERROR.get()) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previousD->configs.get(), startIndex, false); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD->configs.get(), outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + + throw e; + } + + if (D->requiresFullContext && _mode != PredictionMode::SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + BitSet conflictingAlts; + if (D->predicates.size() != 0) { +#if DEBUG_ATN == 1 + std::cout << "DFA state has preds in DFA sim LL failover" << std::endl; +#endif + + size_t conflictIndex = input->index(); + if (conflictIndex != startIndex) { + input->seek(startIndex); + } + + conflictingAlts = evalSemanticContext(D->predicates, outerContext, true); + if (conflictingAlts.count() == 1) { +#if DEBUG_ATN == 1 + std::cout << "Full LL avoided" << std::endl; +#endif + + return conflictingAlts.nextSetBit(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input->seek(conflictIndex); + } + } + +#if DEBUG_DFA == 1 + std::cout << "ctx sensitive state " << outerContext << " in " << D << std::endl; +#endif + + bool fullCtx = true; + Ref<ATNConfigSet> s0_closure = computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext(dfa, conflictingAlts, D->configs.get(), startIndex, input->index()); + size_t alt = execATNWithFullContext(dfa, D, s0_closure.get(), input, startIndex, outerContext); + return alt; + } + + if (D->isAcceptState) { + if (D->predicates.empty()) { + return D->prediction; + } + + size_t stopIndex = input->index(); + input->seek(startIndex); + BitSet alts = evalSemanticContext(D->predicates, outerContext, true); + switch (alts.count()) { + case 0: + throw noViableAlt(input, outerContext, D->configs.get(), startIndex, false); + + case 1: + return alts.nextSetBit(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D->configs.get()); + return alts.nextSetBit(0); + } + } + + previousD = D; + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } +} + +dfa::DFAState *ParserATNSimulator::getExistingTargetState(dfa::DFAState *previousD, size_t t) { + dfa::DFAState* retval; + _edgeLock.readLock(); + auto iterator = previousD->edges.find(t); + retval = (iterator == previousD->edges.end()) ? nullptr : iterator->second; + _edgeLock.readUnlock(); + return retval; +} + +dfa::DFAState *ParserATNSimulator::computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) { + std::unique_ptr<ATNConfigSet> reach = computeReachSet(previousD->configs.get(), t, false); + if (reach == nullptr) { + addDFAEdge(dfa, previousD, t, ERROR.get()); + return ERROR.get(); + } + + // create new target state; we'll add to DFA after it's complete + dfa::DFAState *D = new dfa::DFAState(std::move(reach)); /* mem-check: managed by the DFA or deleted below, "reach" is no longer valid now. */ + size_t predictedAlt = getUniqueAlt(D->configs.get()); + + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D->isAcceptState = true; + D->configs->uniqueAlt = predictedAlt; + D->prediction = predictedAlt; + } else if (PredictionModeClass::hasSLLConflictTerminatingPrediction(_mode, D->configs.get())) { + // MORE THAN ONE VIABLE ALTERNATIVE + D->configs->conflictingAlts = getConflictingAlts(D->configs.get()); + D->requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D->isAcceptState = true; + D->prediction = D->configs->conflictingAlts.nextSetBit(0); + } + + if (D->isAcceptState && D->configs->hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D->predicates.size() != 0) { + D->prediction = ATN::INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + dfa::DFAState *state = addDFAEdge(dfa, previousD, t, D); + if (state != D) { + delete D; // If the new state exists already we don't need it and use the existing one instead. + } + return state; +} + +void ParserATNSimulator::predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + size_t nalts = decisionState->transitions.size(); + + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState->configs.get()); + std::vector<Ref<SemanticContext>> altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState->configs.get(), nalts); + if (!altToPred.empty()) { + dfaState->predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState->prediction = altsToCollectPredsFrom.nextSetBit(0); + } +} + +size_t ParserATNSimulator::execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext) { + + bool fullCtx = true; + bool foundExactAmbig = false; + + std::unique_ptr<ATNConfigSet> reach; + ATNConfigSet *previous = s0; + input->seek(startIndex); + size_t t = input->LA(1); + size_t predictedAlt; + + while (true) { + reach = computeReachSet(previous, t, fullCtx); + if (reach == nullptr) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex, previous != s0); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + if (previous != s0) // Don't delete the start set. + delete previous; + previous = nullptr; + + std::vector<BitSet> altSubSets = PredictionModeClass::getConflictingAltSubsets(reach.get()); + reach->uniqueAlt = getUniqueAlt(reach.get()); + // unique prediction? + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + predictedAlt = reach->uniqueAlt; + break; + } + if (_mode != PredictionMode::LL_EXACT_AMBIG_DETECTION) { + predictedAlt = PredictionModeClass::resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeClass::allSubsetsConflict(altSubSets) && PredictionModeClass::allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeClass::getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach.release(); + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + reportContextSensitivity(dfa, predictedAlt, reach.get(), startIndex, input->index()); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input->index(), foundExactAmbig, reach->getAlts(), reach.get()); + + return predictedAlt; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeReachSet(ATNConfigSet *closure_, size_t t, bool fullCtx) { + + std::unique_ptr<ATNConfigSet> intermediate(new ATNConfigSet(fullCtx)); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + std::vector<Ref<ATNConfig>> skippedStopStates; + + // First figure out where we can reach on input t + for (auto &c : closure_->configs) { + if (is<RuleStopState *>(c->state)) { + assert(c->context->isEmpty()); + + if (fullCtx || t == Token::EOF) { + skippedStopStates.push_back(c); + } + + continue; + } + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + Transition *trans = c->state->transitions[ti]; + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + intermediate->add(std::make_shared<ATNConfig>(c, target), &mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + std::unique_ptr<ATNConfigSet> reach; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates.empty() && t != Token::EOF) { + if (intermediate->size() == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } else if (getUniqueAlt(intermediate.get()) != ATN::INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == nullptr) { + reach.reset(new ATNConfigSet(fullCtx)); + ATNConfig::Set closureBusy; + + bool treatEofAsEpsilon = t == Token::EOF; + for (auto c : intermediate->configs) { + closure(c, reach.get(), closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream::EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + ATNConfigSet *temp = removeAllConfigsNotInRuleStopState(reach.get(), *reach == *intermediate); + if (temp != reach.get()) + reach.reset(temp); // We got a new set, so use that. + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates.size() > 0 && (!fullCtx || !PredictionModeClass::hasConfigInRuleStopState(reach.get()))) { + assert(!skippedStopStates.empty()); + + for (auto c : skippedStopStates) { + reach->add(c, &mergeCache); + } + } + + if (reach->isEmpty()) { + return nullptr; + } + return reach; +} + +ATNConfigSet* ParserATNSimulator::removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, + bool lookToEndOfRule) { + if (PredictionModeClass::allConfigsInRuleStopStates(configs)) { + return configs; + } + + ATNConfigSet *result = new ATNConfigSet(configs->fullCtx); /* mem-check: released by caller */ + + for (auto &config : configs->configs) { + if (is<RuleStopState*>(config->state)) { + result->add(config, &mergeCache); + continue; + } + + if (lookToEndOfRule && config->state->epsilonOnlyTransitions) { + misc::IntervalSet nextTokens = atn.nextTokens(config->state); + if (nextTokens.contains(Token::EPSILON)) { + ATNState *endOfRuleState = atn.ruleToStopState[config->state->ruleIndex]; + result->add(std::make_shared<ATNConfig>(config, endOfRuleState), &mergeCache); + } + } + } + + return result; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx) { + // always at least the implicit call to start rule + Ref<PredictionContext> initialContext = PredictionContext::fromRuleContext(atn, ctx); + std::unique_ptr<ATNConfigSet> configs(new ATNConfigSet(fullCtx)); + + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<ATNConfig> c = std::make_shared<ATNConfig>(target, (int)i + 1, initialContext); + ATNConfig::Set closureBusy; + closure(c, configs.get(), closureBusy, true, fullCtx, false); + } + + return configs; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::applyPrecedenceFilter(ATNConfigSet *configs) { + std::map<size_t, Ref<PredictionContext>> statesFromAlt1; + std::unique_ptr<ATNConfigSet> configSet(new ATNConfigSet(configs->fullCtx)); + for (Ref<ATNConfig> &config : configs->configs) { + // handle alt 1 first + if (config->alt != 1) { + continue; + } + + Ref<SemanticContext> updatedContext = config->semanticContext->evalPrecedence(parser, _outerContext); + if (updatedContext == nullptr) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config->state->stateNumber] = config->context; + if (updatedContext != config->semanticContext) { + configSet->add(std::make_shared<ATNConfig>(config, updatedContext), &mergeCache); + } + else { + configSet->add(config, &mergeCache); + } + } + + for (Ref<ATNConfig> &config : configs->configs) { + if (config->alt == 1) { + // already handled + continue; + } + + if (!config->isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + auto iterator = statesFromAlt1.find(config->state->stateNumber); + if (iterator != statesFromAlt1.end() && *iterator->second == *config->context) { + // eliminated + continue; + } + } + + configSet->add(config, &mergeCache); + } + + return configSet; +} + +atn::ATNState* ParserATNSimulator::getReachableTarget(Transition *trans, size_t ttype) { + if (trans->matches(ttype, 0, atn.maxTokenType)) { + return trans->target; + } + + return nullptr; +} + +// Note that caller must memory manage the returned value from this function +std::vector<Ref<SemanticContext>> ParserATNSimulator::getPredsForAmbigAlts(const BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one un-predicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + std::vector<Ref<SemanticContext>> altToPred(nalts + 1); + + for (auto &c : configs->configs) { + if (ambigAlts.test(c->alt)) { + altToPred[c->alt] = SemanticContext::Or(altToPred[c->alt], c->semanticContext); + } + } + + size_t nPredAlts = 0; + for (size_t i = 1; i <= nalts; i++) { + if (altToPred[i] == nullptr) { + altToPred[i] = SemanticContext::NONE; + } else if (altToPred[i] != SemanticContext::NONE) { + nPredAlts++; + } + } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) { + altToPred.clear(); + } +#if DEBUG_ATN == 1 + std::cout << "getPredsForAmbigAlts result " << Arrays::toString(altToPred) << std::endl; +#endif + + return altToPred; +} + +std::vector<dfa::DFAState::PredPrediction *> ParserATNSimulator::getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + std::vector<Ref<SemanticContext>> const& altToPred) { + bool containsPredicate = std::find_if(altToPred.begin(), altToPred.end(), [](Ref<SemanticContext> const context) { + return context != SemanticContext::NONE; + }) != altToPred.end(); + if (!containsPredicate) + return {}; + + std::vector<dfa::DFAState::PredPrediction*> pairs; + for (size_t i = 1; i < altToPred.size(); ++i) { + Ref<SemanticContext> const& pred = altToPred[i]; + assert(pred != nullptr); // unpredicted is indicated by SemanticContext.NONE + + if (ambigAlts.test(i)) { + pairs.push_back(new dfa::DFAState::PredPrediction(pred, (int)i)); /* mem-check: managed by the DFAState it will be assigned to after return */ + } + } + return pairs; +} + +size_t ParserATNSimulator::getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext) +{ + std::pair<ATNConfigSet *, ATNConfigSet *> sets = splitAccordingToSemanticValidity(configs, outerContext); + std::unique_ptr<ATNConfigSet> semValidConfigs(sets.first); + std::unique_ptr<ATNConfigSet> semInvalidConfigs(sets.second); + size_t alt = getAltThatFinishedDecisionEntryRule(semValidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (!semInvalidConfigs->configs.empty()) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // syntactically viable path exists + return alt; + } + } + return ATN::INVALID_ALT_NUMBER; +} + +size_t ParserATNSimulator::getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs) { + misc::IntervalSet alts; + for (auto &c : configs->configs) { + if (c->getOuterContextDepth() > 0 || (is<RuleStopState *>(c->state) && c->context->hasEmptyPath())) { + alts.add(c->alt); + } + } + if (alts.size() == 0) { + return ATN::INVALID_ALT_NUMBER; + } + return alts.getMinElement(); +} + +std::pair<ATNConfigSet *, ATNConfigSet *> ParserATNSimulator::splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext) { + + // mem-check: both pointers must be freed by the caller. + ATNConfigSet *succeeded(new ATNConfigSet(configs->fullCtx)); + ATNConfigSet *failed(new ATNConfigSet(configs->fullCtx)); + for (Ref<ATNConfig> &c : configs->configs) { + if (c->semanticContext != SemanticContext::NONE) { + bool predicateEvaluationResult = evalSemanticContext(c->semanticContext, outerContext, c->alt, configs->fullCtx); + if (predicateEvaluationResult) { + succeeded->add(c); + } else { + failed->add(c); + } + } else { + succeeded->add(c); + } + } + return { succeeded, failed }; +} + +BitSet ParserATNSimulator::evalSemanticContext(std::vector<dfa::DFAState::PredPrediction*> predPredictions, + ParserRuleContext *outerContext, bool complete) { + BitSet predictions; + for (auto *prediction : predPredictions) { + if (prediction->pred == SemanticContext::NONE) { + predictions.set(prediction->alt); + if (!complete) { + break; + } + continue; + } + + bool fullCtx = false; // in dfa + bool predicateEvaluationResult = evalSemanticContext(prediction->pred, outerContext, prediction->alt, fullCtx); +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "eval pred " << prediction->toString() << " = " << predicateEvaluationResult << std::endl; +#endif + + if (predicateEvaluationResult) { +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "PREDICT " << prediction->alt << std::endl; +#endif + + predictions.set(prediction->alt); + if (!complete) { + break; + } + } + } + + return predictions; +} + +bool ParserATNSimulator::evalSemanticContext(Ref<SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t /*alt*/, bool /*fullCtx*/) { + return pred->eval(parser, parserCallStack); +} + +void ParserATNSimulator::closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) { + const int initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, initialDepth, treatEofAsEpsilon); + + assert(!fullCtx || !configs->dipsIntoOuterContext); +} + +void ParserATNSimulator::closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, + ATNConfig::Set &closureBusy, bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (is<RuleStopState *>(config->state)) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE) { + if (fullCtx) { + configs->add(std::make_shared<ATNConfig>(config, config->state, PredictionContext::EMPTY), &mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) +#if DEBUG_ATN == 1 + std::cout << "FALLING off rule " << getRuleName(config->state->ruleIndex) << std::endl; +#endif + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); + } + continue; + } + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + std::weak_ptr<PredictionContext> newContext = config->context->getParent(i); // "pop" return state + Ref<ATNConfig> c = std::make_shared<ATNConfig>(returnState, config->alt, newContext.lock(), config->semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c->reachesIntoOuterContext = config->reachesIntoOuterContext; + assert(depth > INT_MIN); + + closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs->add(config, &mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); +} + +void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + ATNState *p = config->state; + // optimization + if (!p->epsilonOnlyTransitions) { + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + configs->add(config, &mergeCache); + } + + for (size_t i = 0; i < p->transitions.size(); i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get())) + continue; + + Transition *t = p->transitions[i]; + bool continueCollecting = !is<ActionTransition*>(t) && collectPredicates; + Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon); + if (c != nullptr) { + int newDepth = depth; + if (is<RuleStopState*>(config->state)) { + assert(!fullCtx); + + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (closureBusy.count(c) > 0) { + // avoid infinite recursion for right-recursive rules + continue; + } + closureBusy.insert(c); + + if (_dfa != nullptr && _dfa->isPrecedenceDfa()) { + size_t outermostPrecedenceReturn = dynamic_cast<EpsilonTransition *>(t)->outermostPrecedenceReturn(); + if (outermostPrecedenceReturn == _dfa->atnStartState->ruleIndex) { + c->setPrecedenceFilterSuppressed(true); + } + } + + c->reachesIntoOuterContext++; + + if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + configs->dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method + assert(newDepth > INT_MIN); + + newDepth--; +#if DEBUG_DFA == 1 + std::cout << "dips into outer ctx: " << c << std::endl; +#endif + + } else if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + if (is<RuleTransition*>(t)) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon); + } + } +} + +bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) + return false; + + ATNState *p = config->state; + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p->getStateType() != ATNState::STAR_LOOP_ENTRY || + !((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state? + config->context->isEmpty() || // If SLL wildcard + config->context->hasEmptyPath()) + { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + size_t numCtxs = config->context->size(); + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + if (returnState->ruleIndex != p->ruleIndex) + return false; + } + + BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target; + size_t blockEndStateNum = decisionStartState->endState->stateNumber; + BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + size_t returnStateNumber = config->context->getReturnState(i); + ATNState *returnState = atn.states[returnStateNumber]; + // All states must have single outgoing epsilon edge. + if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon()) + { + return false; + } + + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState *returnStateTarget = returnState->transitions[0]->target; + if (returnState->getStateType() == ATNState::BLOCK_END && returnStateTarget == p) { + continue; + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget->getStateType() == ATNState::BLOCK_END && + returnStateTarget->transitions.size() == 1 && + returnStateTarget->transitions[0]->isEpsilon() && + returnStateTarget->transitions[0]->target == p) + { + continue; + } + + // Anything else ain't conforming. + return false; + } + + return true; +} + +std::string ParserATNSimulator::getRuleName(size_t index) { + if (parser != nullptr) { + return parser->getRuleNames()[index]; + } + return "<rule " + std::to_string(index) + ">"; +} + +Ref<ATNConfig> ParserATNSimulator::getEpsilonTarget(Ref<ATNConfig> const& config, Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon) { + switch (t->getSerializationType()) { + case Transition::RULE: + return ruleTransition(config, static_cast<RuleTransition*>(t)); + + case Transition::PRECEDENCE: + return precedenceTransition(config, static_cast<PrecedencePredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case Transition::PREDICATE: + return predTransition(config, static_cast<PredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case Transition::ACTION: + return actionTransition(config, static_cast<ActionTransition*>(t)); + + case Transition::EPSILON: + return std::make_shared<ATNConfig>(config, t->target); + + case Transition::ATOM: + case Transition::RANGE: + case Transition::SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, 0, 1)) { + return std::make_shared<ATNConfig>(config, t->target); + } + } + + return nullptr; + + default: + return nullptr; + } +} + +Ref<ATNConfig> ParserATNSimulator::actionTransition(Ref<ATNConfig> const& config, ActionTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "ACTION edge " << t->ruleIndex << ":" << t->actionIndex << std::endl; +#endif + + return std::make_shared<ATNConfig>(config, t->target); +} + +Ref<ATNConfig> ParserATNSimulator::precedenceTransition(Ref<ATNConfig> const& config, PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->precedence << ">=_p" << ", ctx dependent=true" << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c; + if (collectPredicates && inContext) { + Ref<SemanticContext::PrecedencePredicate> predicate = pt->getPredicate(); + + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(pt->getPredicate(), _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(config, pt->target); // no pred context + } + } else { + Ref<SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(config, pt->target, newSemCtx); + } + } else { + c = std::make_shared<ATNConfig>(config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::predTransition(Ref<ATNConfig> const& config, PredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->ruleIndex << ":" << pt->predIndex << ", ctx dependent=" << pt->isCtxDependent << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c = nullptr; + if (collectPredicates && (!pt->isCtxDependent || (pt->isCtxDependent && inContext))) { + Ref<SemanticContext::Predicate> predicate = pt->getPredicate(); + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(pt->getPredicate(), _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(config, pt->target); // no pred context + } + } else { + Ref<SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(config, pt->target, newSemCtx); + } + } else { + c = std::make_shared<ATNConfig>(config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::ruleTransition(Ref<ATNConfig> const& config, RuleTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "CALL rule " << getRuleName(t->target->ruleIndex) << ", ctx=" << config->context << std::endl; +#endif + + atn::ATNState *returnState = t->followState; + Ref<PredictionContext> newContext = SingletonPredictionContext::create(config->context, returnState->stateNumber); + return std::make_shared<ATNConfig>(config, t->target, newContext); +} + +BitSet ParserATNSimulator::getConflictingAlts(ATNConfigSet *configs) { + std::vector<BitSet> altsets = PredictionModeClass::getConflictingAltSubsets(configs); + return PredictionModeClass::getAlts(altsets); +} + +BitSet ParserATNSimulator::getConflictingAltsOrUniqueAlt(ATNConfigSet *configs) { + BitSet conflictingAlts; + if (configs->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + conflictingAlts.set(configs->uniqueAlt); + } else { + conflictingAlts = configs->conflictingAlts; + } + return conflictingAlts; +} + +std::string ParserATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + + const dfa::Vocabulary &vocabulary = parser != nullptr ? parser->getVocabulary() : dfa::Vocabulary::EMPTY_VOCABULARY; + std::string displayName = vocabulary.getDisplayName(t); + if (displayName == std::to_string(t)) { + return displayName; + } + + return displayName + "<" + std::to_string(t) + ">"; +} + +std::string ParserATNSimulator::getLookaheadName(TokenStream *input) { + return getTokenName(input->LA(1)); +} + +void ParserATNSimulator::dumpDeadEndConfigs(NoViableAltException &nvae) { + std::cerr << "dead end configs: "; + for (auto c : nvae.getDeadEndConfigs()->configs) { + std::string trans = "no edges"; + if (c->state->transitions.size() > 0) { + Transition *t = c->state->transitions[0]; + if (is<AtomTransition*>(t)) { + AtomTransition *at = static_cast<AtomTransition*>(t); + trans = "Atom " + getTokenName(at->_label); + } else if (is<SetTransition*>(t)) { + SetTransition *st = static_cast<SetTransition*>(t); + bool is_not = is<NotSetTransition*>(st); + trans = (is_not ? "~" : ""); + trans += "Set "; + trans += st->set.toString(); + } + } + std::cerr << c->toString(true) + ":" + trans; + } +} + +NoViableAltException ParserATNSimulator::noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs) { + return NoViableAltException(parser, input, input->get(startIndex), input->LT(1), configs, outerContext, deleteConfigs); +} + +size_t ParserATNSimulator::getUniqueAlt(ATNConfigSet *configs) { + size_t alt = ATN::INVALID_ALT_NUMBER; + for (auto &c : configs->configs) { + if (alt == ATN::INVALID_ALT_NUMBER) { + alt = c->alt; // found first alt + } else if (c->alt != alt) { + return ATN::INVALID_ALT_NUMBER; + } + } + return alt; +} + +dfa::DFAState *ParserATNSimulator::addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to) { +#if DEBUG_DFA == 1 + std::cout << "EDGE " << from << " -> " << to << " upon " << getTokenName(t) << std::endl; +#endif + + if (to == nullptr) { + return nullptr; + } + + _stateLock.writeLock(); + to = addDFAState(dfa, to); // used existing if possible not incoming + _stateLock.writeUnlock(); + if (from == nullptr || t > (int)atn.maxTokenType) { + return to; + } + + { + _edgeLock.writeLock(); + from->edges[t] = to; // connect + _edgeLock.writeUnlock(); + } + +#if DEBUG_DFA == 1 + std::string dfaText; + if (parser != nullptr) { + dfaText = dfa.toString(parser->getVocabulary()); + } else { + dfaText = dfa.toString(dfa::Vocabulary::EMPTY_VOCABULARY); + } + std::cout << "DFA=\n" << dfaText << std::endl; +#endif + + return to; +} + +dfa::DFAState *ParserATNSimulator::addDFAState(dfa::DFA &dfa, dfa::DFAState *D) { + if (D == ERROR.get()) { + return D; + } + + auto existing = dfa.states.find(D); + if (existing != dfa.states.end()) { + return *existing; + } + + D->stateNumber = (int)dfa.states.size(); + if (!D->configs->isReadonly()) { + D->configs->optimizeConfigs(this); + D->configs->setReadonly(true); + } + + dfa.states.insert(D); + +#if DEBUG_DFA == 1 + std::cout << "adding new DFA state: " << D << std::endl; +#endif + + return D; +} + +void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAttemptingFullContext decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAttemptingFullContext(parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ParserATNSimulator::reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval(startIndex, stopIndex); + std::cout << "reportContextSensitivity decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); + } +} + +void ParserATNSimulator::reportAmbiguity(dfa::DFA &dfa, dfa::DFAState * /*D*/, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAmbiguity " << ambigAlts << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ParserATNSimulator::setPredictionMode(PredictionMode newMode) { + _mode = newMode; +} + +atn::PredictionMode ParserATNSimulator::getPredictionMode() { + return _mode; +} + +Parser* ParserATNSimulator::getParser() { + return parser; +} + +#ifdef _MSC_VER +#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead. +#endif + +bool ParserATNSimulator::getLrLoopSetting() { + char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + if (var == nullptr) + return false; + std::string value(var); + return value == "true" || value == "1"; +} + +#ifdef _MSC_VER +#pragma warning (default:4996) +#endif + +void ParserATNSimulator::InitializeInstanceFields() { + _mode = PredictionMode::LL; + _startIndex = 0; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.h new file mode 100644 index 0000000..6520a44 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ParserATNSimulator.h @@ -0,0 +1,904 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "PredictionMode.h" +#include "dfa/DFAState.h" +#include "atn/ATNSimulator.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + /** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + * <p> + * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).</p> + * + * <p> + * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).</p> + * + * <p> + * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)</p> + * + * <p> + * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.</p> + * + * <p> + * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> + * + * <p> + * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.</p> + * + * <p> + * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.</p> + * + * <p> + * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.</p> + * + * <p> + * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> + * + * <p> + * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.</p> + * + * <p> + * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:</p> + * + * <pre> + * ctorBody + * : '{' superCall? stat* '}' + * ; + * </pre> + * + * <p> + * Or, you might see something like</p> + * + * <pre> + * stat + * : superCall ';' + * | expression ';' + * | ... + * ; + * </pre> + * + * <p> + * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.</p> + * + * <p> + * <strong>PREDICATES</strong></p> + * + * <p> + * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.</p> + * + * <p> + * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.</p> + * + * <p> + * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.</p> + * + * <p> + * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.</p> + * + * <p> + * <strong>SHARING DFA</strong></p> + * + * <p> + * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * {@link PredictionContextCache} object that makes sure that all + * {@link PredictionContext} objects are shared among the DFA states. This makes + * a big size difference.</p> + * + * <p> + * <strong>THREAD SAFETY</strong></p> + * + * <p> + * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' {@link PredictionContext} objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target {@link DFAState}, or + * {@code null}. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if {@code null}, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.</p> + * + * <p> + * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)</strong></p> + * + * <p> + * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the {@link BailErrorStrategy}:</p> + * + * <pre> + * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; + * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); + * </pre> + * + * <p> + * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.</p> + * + * <p> + * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.</p> + * + * <p> + * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> + * + * <p> + * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.</p> + */ + class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator { + public: + /// Testing only! + ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + virtual void reset() override; + virtual void clearDFA() override; + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext); + + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + + std::vector<dfa::DFA> &decisionToDFA; + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; + virtual std::string getRuleName(size_t index); + + virtual Ref<ATNConfig> precedenceTransition(Ref<ATNConfig> const& config, PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx); + + void setPredictionMode(PredictionMode newMode); + PredictionMode getPredictionMode(); + + Parser* getParser(); + + virtual std::string getTokenName(size_t t); + + virtual std::string getLookaheadName(TokenStream *input); + + /// <summary> + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + /// </summary> + virtual void dumpDeadEndConfigs(NoViableAltException &nvae); + + protected: + Parser *const parser; + + /// <summary> + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. The merge cache + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)->c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)->c should + /// also be examined during cache lookup. + /// </summary> + PredictionContextMergeCache mergeCache; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream *_input; + size_t _startIndex; + ParserRuleContext *_outerContext; + dfa::DFA *_dfa; // Reference into the decisionToDFA vector. + + /// <summary> + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// </summary> + virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t); + + virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState); + + // comes back with reach.uniqueAlt set to a valid alt + virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over + + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx); + + /// <summary> + /// Return a configuration set containing only the configurations from + /// {@code configs} which are in a <seealso cref="RuleStopState"/>. If all + /// configurations in {@code configs} are already in a rule stop state, this + /// method simply returns {@code configs}. + /// <p/> + /// When {@code lookToEndOfRule} is true, this method uses + /// <seealso cref="ATN#nextTokens"/> for each configuration in {@code configs} which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions. + /// </summary> + /// <param name="configs"> the configuration set to update </param> + /// <param name="lookToEndOfRule"> when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// {@code configs}. + /// </param> + /// <returns> {@code configs} if all configurations in {@code configs} are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from {@code configs} which are in a rule stop state </returns> + virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx); + + /* parrt internal source braindump that doesn't mess up + * external API spec. + + applyPrecedenceFilter is an optimization to avoid highly + nonlinear prediction of expressions and other left recursive + rules. The precedence predicates such as {3>=prec}? Are highly + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + * <ol> + * <li>Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.</li> + * <li>When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false}, + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + * <ul> + * <li>The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.</li> + * <li>The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.</li> + * </ul> + * </li> + * </ol> + * + * <p> + * The prediction context must be considered by this filter to address + * situations like the following. + * </p> + * <code> + * <pre> + * grammar TA; + * prog: statement* EOF; + * statement: letterA | statement letterA 'b' ; + * letterA: 'a'; + * </pre> + * </code> + * <p> + * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in {@code letterA} is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * {@code statement}. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to {@code prog} (and then back in to {@code statement} + * from being eliminated by the filter. + * </p> + * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + std::unique_ptr<ATNConfigSet> applyPrecedenceFilter(ATNConfigSet *configs); + + virtual ATNState *getReachableTarget(Transition *trans, size_t ttype); + + virtual std::vector<Ref<SemanticContext>> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts); + + virtual std::vector<dfa::DFAState::PredPrediction*> getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + std::vector<Ref<SemanticContext>> const& altToPred); + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * {@link NoViableAltException} in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + * <p> + * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * {@link ParserRuleContext} returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.</p> + * + * <ul> + * <li>If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.</li> + * <li>Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + * </li> + * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> + * </ul> + * + * <p> + * In some scenarios, the algorithm described above could predict an + * alternative which will result in a {@link FailedPredicateException} in + * the parser. Specifically, this could occur if the <em>only</em> configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * {@link NoViableAltException}, the resulting + * {@link FailedPredicateException} in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + * </p> + * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs); + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + std::pair<ATNConfigSet *, ATNConfigSet *> splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + /// <summary> + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A {@code NONE} predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + /// </summary> + virtual antlrcpp::BitSet evalSemanticContext(std::vector<dfa::DFAState::PredPrediction*> predPredictions, + ParserRuleContext *outerContext, bool complete); + + /** + * Evaluate a semantic context within a specific parser context. + * + * <p> + * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:</p> + * + * <ul> + * <li>Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.</li> + * <li>Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.</li> + * </ul> + * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by {@code pred} + * @param fullCtx {@code true} if the evaluation is occurring during LL + * prediction; otherwise, {@code false} if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + virtual bool evalSemanticContext(Ref<SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx); + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + virtual void closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon); + + virtual void closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + /// Do the actual work of walking epsilon edges. + virtual void closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + virtual Ref<ATNConfig> getEpsilonTarget(Ref<ATNConfig> const& config, Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon); + virtual Ref<ATNConfig> actionTransition(Ref<ATNConfig> const& config, ActionTransition *t); + + virtual Ref<ATNConfig> predTransition(Ref<ATNConfig> const& config, PredicateTransition *pt, bool collectPredicates, + bool inContext, bool fullCtx); + + virtual Ref<ATNConfig> ruleTransition(Ref<ATNConfig> const& config, RuleTransition *t); + + /** + * Gets a {@link BitSet} containing the alternatives in {@code configs} + * which are part of one or more conflicting alternative subsets. + * + * @param configs The {@link ATNConfigSet} to analyze. + * @return The alternatives in {@code configs} which are part of one or more + * conflicting alternative subsets. If {@code configs} does not contain any + * conflicting subsets, this method returns an empty {@link BitSet}. + */ + virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs); + + /// <summary> + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state->config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + /// </summary> + + virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs); + + virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs); + + static size_t getUniqueAlt(ATNConfigSet *configs); + + /// <summary> + /// Add an edge to the DFA, if possible. This method calls + /// <seealso cref="#addDFAState"/> to ensure the {@code to} state is present in the + /// DFA. If {@code from} is {@code null}, or if {@code t} is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + /// <p/> + /// If {@code to} is {@code null}, this method returns {@code null}. + /// Otherwise, this method returns the <seealso cref="DFAState"/> returned by calling + /// <seealso cref="#addDFAState"/> for the {@code to} state. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="from"> The source state for the edge </param> + /// <param name="t"> The input symbol </param> + /// <param name="to"> The target state for the edge + /// </param> + /// <returns> If {@code to} is {@code null}, this method returns {@code null}; + /// otherwise this method returns the result of calling <seealso cref="#addDFAState"/> + /// on {@code to} </returns> + virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to); + + /// <summary> + /// Add state {@code D} to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to {@code D} + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns {@code D} after adding it to the DFA. + /// <p/> + /// If {@code D} is <seealso cref="#ERROR"/>, this method returns <seealso cref="#ERROR"/> and + /// does not change the DFA. + /// </summary> + /// <param name="dfa"> The dfa </param> + /// <param name="D"> The DFA state to add </param> + /// <returns> The state stored in the DFA. This will be either the existing + /// state if {@code D} is already in the DFA, or {@code D} itself if the + /// state was not already present. </returns> + virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D); + + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex); + + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex); + + /// If context sensitive parsing, we know it's ambiguity not conflict. + virtual void reportAmbiguity(dfa::DFA &dfa, + dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts + size_t startIndex, size_t stopIndex, + bool exact, + const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs); // configs that LL not SLL considered conflicting + + private: + // SLL, LL, or LL + exact ambig detection? + PredictionMode _mode; + + static bool getLrLoopSetting(); + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.cpp new file mode 100644 index 0000000..b0ee12a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PlusBlockStartState.h" + +using namespace antlr4::atn; + +size_t PlusBlockStartState::getStateType() { + return PLUS_BLOCK_START; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.h new file mode 100644 index 0000000..a3affb8 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusBlockStartState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// Start of {@code (A|B|...)+} loop. Technically a decision state, but + /// we don't use for code generation; somebody might need it, so I'm defining + /// it for completeness. In reality, the <seealso cref="PlusLoopbackState"/> node is the + /// real decision-making note for {@code A+}. + class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState { + public: + PlusLoopbackState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.cpp new file mode 100644 index 0000000..1edab24 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PlusLoopbackState.h" + +using namespace antlr4::atn; + +size_t PlusLoopbackState::getStateType() { + return PLUS_LOOP_BACK; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.h new file mode 100644 index 0000000..ba7a4b6 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PlusLoopbackState.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: + /// one to the loop back to start of the block and one to exit. + class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.cpp new file mode 100644 index 0000000..9aedc9d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PrecedencePredicateTransition.h" + +using namespace antlr4::atn; + +PrecedencePredicateTransition::PrecedencePredicateTransition(ATNState *target, int precedence) + : AbstractPredicateTransition(target), precedence(precedence) { +} + +Transition::SerializationType PrecedencePredicateTransition::getSerializationType() const { + return PRECEDENCE; +} + +bool PrecedencePredicateTransition::isEpsilon() const { + return true; +} + +bool PrecedencePredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +Ref<SemanticContext::PrecedencePredicate> PrecedencePredicateTransition::getPredicate() const { + return std::make_shared<SemanticContext::PrecedencePredicate>(precedence); +} + +std::string PrecedencePredicateTransition::toString() const { + return "PRECEDENCE " + Transition::toString() + " { precedence: " + std::to_string(precedence) + " }"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.h new file mode 100644 index 0000000..bc22146 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PrecedencePredicateTransition.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/AbstractPredicateTransition.h" +#include "SemanticContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public AbstractPredicateTransition { + public: + const int precedence; + + PrecedencePredicateTransition(ATNState *target, int precedence); + + virtual SerializationType getSerializationType() const override; + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + Ref<SemanticContext::PrecedencePredicate> getPredicate() const; + virtual std::string toString() const override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.cpp new file mode 100644 index 0000000..3d86bfe --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "SemanticContext.h" + +#include "atn/PredicateEvalInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +PredicateEvalInfo::PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<SemanticContext> const& semctx, bool evalResult, size_t predictedAlt, bool fullCtx) + : DecisionEventInfo(decision, nullptr, input, startIndex, stopIndex, fullCtx), + semctx(semctx), predictedAlt(predictedAlt), evalResult(evalResult) { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.h new file mode 100644 index 0000000..b0513ae --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateEvalInfo.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for semantic predicate + /// evaluations which occur during prediction. + /// </summary> + /// <seealso cref= ParserATNSimulator#evalSemanticContext + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo { + public: + /// The semantic context which was evaluated. + const Ref<SemanticContext> semctx; + + /// <summary> + /// The alternative number for the decision which is guarded by the semantic + /// context <seealso cref="#semctx"/>. Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or <seealso cref="SemanticContext#NONE"/>. + /// </summary> + const size_t predictedAlt; + + /// The result of evaluating the semantic context <seealso cref="#semctx"/>. + const bool evalResult; + + /// <summary> + /// Constructs a new instance of the <seealso cref="PredicateEvalInfo"/> class with the + /// specified detailed predicate evaluation information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. </param> + /// <param name="semctx"> The semantic context which was evaluated </param> + /// <param name="evalResult"> The results of evaluating the semantic context </param> + /// <param name="predictedAlt"> The alternative number for the decision which is + /// guarded by the semantic context {@code semctx}. See <seealso cref="#predictedAlt"/> + /// for more information. </param> + /// <param name="fullCtx"> {@code true} if the semantic context was + /// evaluated during LL prediction; otherwise, {@code false} if the semantic + /// context was evaluated during SLL prediction + /// </param> + /// <seealso cref= ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) </seealso> + /// <seealso cref= SemanticContext#eval(Recognizer, RuleContext) </seealso> + PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<SemanticContext> const& semctx, bool evalResult, size_t predictedAlt, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.cpp new file mode 100644 index 0000000..984fc20 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.cpp @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateTransition.h" + +using namespace antlr4::atn; + +PredicateTransition::PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent) : AbstractPredicateTransition(target), ruleIndex(ruleIndex), predIndex(predIndex), isCtxDependent(isCtxDependent) { +} + +Transition::SerializationType PredicateTransition::getSerializationType() const { + return PREDICATE; +} + +bool PredicateTransition::isEpsilon() const { + return true; +} + +bool PredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +Ref<SemanticContext::Predicate> PredicateTransition::getPredicate() const { + return std::make_shared<SemanticContext::Predicate>(ruleIndex, predIndex, isCtxDependent); +} + +std::string PredicateTransition::toString() const { + return "PREDICATE " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + + ", predIndex: " + std::to_string(predIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }"; + + // Generate and add a predicate context here? +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.h new file mode 100644 index 0000000..4d9b420 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredicateTransition.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/AbstractPredicateTransition.h" +#include "SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// TODO: this is old comment: + /// A tree of semantic predicates from the grammar AST if label==SEMPRED. + /// In the ATN, labels will always be exactly one predicate, but the DFA + /// may have to combine a bunch of them as it collects predicates from + /// multiple ATN configurations into a single DFA state. + class ANTLR4CPP_PUBLIC PredicateTransition final : public AbstractPredicateTransition { + public: + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + Ref<SemanticContext::Predicate> getPredicate() const; + + virtual std::string toString() const override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.cpp new file mode 100644 index 0000000..860a180 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.cpp @@ -0,0 +1,662 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EmptyPredictionContext.h" +#include "misc/MurmurHash.h" +#include "atn/ArrayPredictionContext.h" +#include "RuleContext.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "support/Arrays.h" +#include "support/CPPUtils.h" + +#include "atn/PredictionContext.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::atn; + +using namespace antlrcpp; + +size_t PredictionContext::globalNodeCount = 0; +const Ref<PredictionContext> PredictionContext::EMPTY = std::make_shared<EmptyPredictionContext>(); + +//----------------- PredictionContext ---------------------------------------------------------------------------------- + +PredictionContext::PredictionContext(size_t cachedHashCode) : id(globalNodeCount++), cachedHashCode(cachedHashCode) { +} + +PredictionContext::~PredictionContext() { +} + +Ref<PredictionContext> PredictionContext::fromRuleContext(const ATN &atn, RuleContext *outerContext) { + if (outerContext == nullptr) { + return PredictionContext::EMPTY; + } + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext->parent == nullptr || outerContext == &ParserRuleContext::EMPTY) { + return PredictionContext::EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + Ref<PredictionContext> parent = PredictionContext::fromRuleContext(atn, dynamic_cast<RuleContext *>(outerContext->parent)); + + ATNState *state = atn.states.at(outerContext->invokingState); + RuleTransition *transition = (RuleTransition *)state->transitions[0]; + return SingletonPredictionContext::create(parent, transition->followState->stateNumber); +} + +bool PredictionContext::isEmpty() const { + return this == EMPTY.get(); +} + +bool PredictionContext::hasEmptyPath() const { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(size() - 1) == EMPTY_RETURN_STATE; +} + +size_t PredictionContext::hashCode() const { + return cachedHashCode; +} + +size_t PredictionContext::calculateEmptyHashCode() { + size_t hash = MurmurHash::initialize(INITIAL_HASH); + hash = MurmurHash::finish(hash, 0); + return hash; +} + +size_t PredictionContext::calculateHashCode(Ref<PredictionContext> parent, size_t returnState) { + size_t hash = MurmurHash::initialize(INITIAL_HASH); + hash = MurmurHash::update(hash, parent); + hash = MurmurHash::update(hash, returnState); + hash = MurmurHash::finish(hash, 2); + return hash; +} + +size_t PredictionContext::calculateHashCode(const std::vector<Ref<PredictionContext>> &parents, + const std::vector<size_t> &returnStates) { + size_t hash = MurmurHash::initialize(INITIAL_HASH); + + for (auto parent : parents) { + hash = MurmurHash::update(hash, parent); + } + + for (auto returnState : returnStates) { + hash = MurmurHash::update(hash, returnState); + } + + return MurmurHash::finish(hash, parents.size() + returnStates.size()); +} + +Ref<PredictionContext> PredictionContext::merge(const Ref<PredictionContext> &a, + const Ref<PredictionContext> &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + assert(a && b); + + // share same graph if both same + if (a == b || *a == *b) { + return a; + } + + if (is<SingletonPredictionContext>(a) && is<SingletonPredictionContext>(b)) { + return mergeSingletons(std::dynamic_pointer_cast<SingletonPredictionContext>(a), + std::dynamic_pointer_cast<SingletonPredictionContext>(b), rootIsWildcard, mergeCache); + } + + // At least one of a or b is array. + // If one is $ and rootIsWildcard, return $ as * wildcard. + if (rootIsWildcard) { + if (is<EmptyPredictionContext>(a)) { + return a; + } + if (is<EmptyPredictionContext>(b)) { + return b; + } + } + + // convert singleton so both are arrays to normalize + Ref<ArrayPredictionContext> left; + if (is<SingletonPredictionContext>(a)) { + left = std::make_shared<ArrayPredictionContext>(std::dynamic_pointer_cast<SingletonPredictionContext>(a)); + } else { + left = std::dynamic_pointer_cast<ArrayPredictionContext>(a); + } + Ref<ArrayPredictionContext> right; + if (is<SingletonPredictionContext>(b)) { + right = std::make_shared<ArrayPredictionContext>(std::dynamic_pointer_cast<SingletonPredictionContext>(b)); + } else { + right = std::dynamic_pointer_cast<ArrayPredictionContext>(b); + } + return mergeArrays(left, right, rootIsWildcard, mergeCache); +} + +Ref<PredictionContext> PredictionContext::mergeSingletons(const Ref<SingletonPredictionContext> &a, + const Ref<SingletonPredictionContext> &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache != nullptr) { // Can be null if not given to the ATNState from which this call originates. + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + Ref<PredictionContext> rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge) { + if (mergeCache != nullptr) { + mergeCache->put(a, b, rootMerge); + } + return rootMerge; + } + + Ref<PredictionContext> parentA = a->parent; + Ref<PredictionContext> parentB = b->parent; + if (a->returnState == b->returnState) { // a == b + Ref<PredictionContext> parent = merge(parentA, parentB, rootIsWildcard, mergeCache); + + // If parent is same as existing a or b parent or reduced to a parent, return it. + if (parent == parentA) { // ax + bx = ax, if a=b + return a; + } + if (parent == parentB) { // ax + bx = bx, if a=b + return b; + } + + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + Ref<PredictionContext> a_ = SingletonPredictionContext::create(parent, a->returnState); + if (mergeCache != nullptr) { + mergeCache->put(a, b, a_); + } + return a_; + } else { + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + Ref<PredictionContext> singleParent; + if (a == b || (*parentA == *parentB)) { // ax + bx = [a,b]x + singleParent = parentA; + } + if (singleParent) { // parents are same, sort payloads and use same parent + std::vector<size_t> payloads = { a->returnState, b->returnState }; + if (a->returnState > b->returnState) { + payloads[0] = b->returnState; + payloads[1] = a->returnState; + } + std::vector<Ref<PredictionContext>> parents = { singleParent, singleParent }; + Ref<PredictionContext> a_ = std::make_shared<ArrayPredictionContext>(parents, payloads); + if (mergeCache != nullptr) { + mergeCache->put(a, b, a_); + } + return a_; + } + + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + Ref<PredictionContext> a_; + if (a->returnState > b->returnState) { // sort by payload + std::vector<size_t> payloads = { b->returnState, a->returnState }; + std::vector<Ref<PredictionContext>> parents = { b->parent, a->parent }; + a_ = std::make_shared<ArrayPredictionContext>(parents, payloads); + } else { + std::vector<size_t> payloads = {a->returnState, b->returnState}; + std::vector<Ref<PredictionContext>> parents = { a->parent, b->parent }; + a_ = std::make_shared<ArrayPredictionContext>(parents, payloads); + } + + if (mergeCache != nullptr) { + mergeCache->put(a, b, a_); + } + return a_; + } +} + +Ref<PredictionContext> PredictionContext::mergeRoot(const Ref<SingletonPredictionContext> &a, + const Ref<SingletonPredictionContext> &b, bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) { // * + b = * + return EMPTY; + } + if (b == EMPTY) { // a + * = * + return EMPTY; + } + } else { + if (a == EMPTY && b == EMPTY) { // $ + $ = $ + return EMPTY; + } + if (a == EMPTY) { // $ + x = [$,x] + std::vector<size_t> payloads = { b->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<PredictionContext>> parents = { b->parent, nullptr }; + Ref<PredictionContext> joined = std::make_shared<ArrayPredictionContext>(parents, payloads); + return joined; + } + if (b == EMPTY) { // x + $ = [$,x] ($ is always first if present) + std::vector<size_t> payloads = { a->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<PredictionContext>> parents = { a->parent, nullptr }; + Ref<PredictionContext> joined = std::make_shared<ArrayPredictionContext>(parents, payloads); + return joined; + } + } + return nullptr; +} + +Ref<PredictionContext> PredictionContext::mergeArrays(const Ref<ArrayPredictionContext> &a, + const Ref<ArrayPredictionContext> &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache != nullptr) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + // merge sorted payloads a + b => M + size_t i = 0; // walks a + size_t j = 0; // walks b + size_t k = 0; // walks target M array + + std::vector<size_t> mergedReturnStates(a->returnStates.size() + b->returnStates.size()); + std::vector<Ref<PredictionContext>> mergedParents(a->returnStates.size() + b->returnStates.size()); + + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a->returnStates.size() && j < b->returnStates.size()) { + Ref<PredictionContext> a_parent = a->parents[i]; + Ref<PredictionContext> b_parent = b->parents[j]; + if (a->returnStates[i] == b->returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + size_t payload = a->returnStates[i]; + // $+$ = $ + bool both$ = payload == EMPTY_RETURN_STATE && !a_parent && !b_parent; + bool ax_ax = (a_parent && b_parent) && *a_parent == *b_parent; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = a_parent; // choose left + mergedReturnStates[k] = payload; + } + else { // ax+ay -> a'[x,y] + Ref<PredictionContext> mergedParent = merge(a_parent, b_parent, rootIsWildcard, mergeCache); + mergedParents[k] = mergedParent; + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a->returnStates[i] < b->returnStates[j]) { // copy a[i] to M + mergedParents[k] = a_parent; + mergedReturnStates[k] = a->returnStates[i]; + i++; + } + else { // b > a, copy b[j] to M + mergedParents[k] = b_parent; + mergedReturnStates[k] = b->returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a->returnStates.size()) { + for (std::vector<int>::size_type p = i; p < a->returnStates.size(); p++) { + mergedParents[k] = a->parents[p]; + mergedReturnStates[k] = a->returnStates[p]; + k++; + } + } else { + for (std::vector<int>::size_type p = j; p < b->returnStates.size(); p++) { + mergedParents[k] = b->parents[p]; + mergedReturnStates[k] = b->returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.size()) { // write index < last position; trim + if (k == 1) { // for just one merged element, return singleton top + Ref<PredictionContext> a_ = SingletonPredictionContext::create(mergedParents[0], mergedReturnStates[0]); + if (mergeCache != nullptr) { + mergeCache->put(a, b, a_); + } + return a_; + } + mergedParents.resize(k); + mergedReturnStates.resize(k); + } + + Ref<ArrayPredictionContext> M = std::make_shared<ArrayPredictionContext>(mergedParents, mergedReturnStates); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (*M == *a) { + if (mergeCache != nullptr) { + mergeCache->put(a, b, a); + } + return a; + } + if (*M == *b) { + if (mergeCache != nullptr) { + mergeCache->put(a, b, b); + } + return b; + } + + // ml: this part differs from Java code. We have to recreate the context as the parents array is copied on creation. + if (combineCommonParents(mergedParents)) { + mergedReturnStates.resize(mergedParents.size()); + M = std::make_shared<ArrayPredictionContext>(mergedParents, mergedReturnStates); + } + + if (mergeCache != nullptr) { + mergeCache->put(a, b, M); + } + return M; +} + +bool PredictionContext::combineCommonParents(std::vector<Ref<PredictionContext>> &parents) { + + std::set<Ref<PredictionContext>> uniqueParents; + for (size_t p = 0; p < parents.size(); ++p) { + Ref<PredictionContext> parent = parents[p]; + if (uniqueParents.find(parent) == uniqueParents.end()) { // don't replace + uniqueParents.insert(parent); + } + } + + for (size_t p = 0; p < parents.size(); ++p) { + parents[p] = *uniqueParents.find(parents[p]); + } + + return true; +} + +std::string PredictionContext::toDOTString(const Ref<PredictionContext> &context) { + if (context == nullptr) { + return ""; + } + + std::stringstream ss; + ss << "digraph G {\n" << "rankdir=LR;\n"; + + std::vector<Ref<PredictionContext>> nodes = getAllContextNodes(context); + std::sort(nodes.begin(), nodes.end(), [](const Ref<PredictionContext> &o1, const Ref<PredictionContext> &o2) { + return o1->id - o2->id; + }); + + for (auto current : nodes) { + if (is<SingletonPredictionContext>(current)) { + std::string s = std::to_string(current->id); + ss << " s" << s; + std::string returnState = std::to_string(current->getReturnState(0)); + if (is<EmptyPredictionContext>(current)) { + returnState = "$"; + } + ss << " [label=\"" << returnState << "\"];\n"; + continue; + } + Ref<ArrayPredictionContext> arr = std::static_pointer_cast<ArrayPredictionContext>(current); + ss << " s" << arr->id << " [shape=box, label=\"" << "["; + bool first = true; + for (auto inv : arr->returnStates) { + if (!first) { + ss << ", "; + } + if (inv == EMPTY_RETURN_STATE) { + ss << "$"; + } else { + ss << inv; + } + first = false; + } + ss << "]"; + ss << "\"];\n"; + } + + for (auto current : nodes) { + if (current == EMPTY) { + continue; + } + for (size_t i = 0; i < current->size(); i++) { + if (!current->getParent(i)) { + continue; + } + ss << " s" << current->id << "->" << "s" << current->getParent(i)->id; + if (current->size() > 1) { + ss << " [label=\"parent[" << i << "]\"];\n"; + } else { + ss << ";\n"; + } + } + } + + ss << "}\n"; + return ss.str(); +} + +// The "visited" map is just a temporary structure to control the retrieval process (which is recursive). +Ref<PredictionContext> PredictionContext::getCachedContext(const Ref<PredictionContext> &context, + PredictionContextCache &contextCache, std::map<Ref<PredictionContext>, Ref<PredictionContext>> &visited) { + if (context->isEmpty()) { + return context; + } + + { + auto iterator = visited.find(context); + if (iterator != visited.end()) + return iterator->second; // Not necessarly the same as context. + } + + auto iterator = contextCache.find(context); + if (iterator != contextCache.end()) { + visited[context] = *iterator; + + return *iterator; + } + + bool changed = false; + + std::vector<Ref<PredictionContext>> parents(context->size()); + for (size_t i = 0; i < parents.size(); i++) { + Ref<PredictionContext> parent = getCachedContext(context->getParent(i), contextCache, visited); + if (changed || parent != context->getParent(i)) { + if (!changed) { + parents.clear(); + for (size_t j = 0; j < context->size(); j++) { + parents.push_back(context->getParent(j)); + } + + changed = true; + } + + parents[i] = parent; + } + } + + if (!changed) { + contextCache.insert(context); + visited[context] = context; + + return context; + } + + Ref<PredictionContext> updated; + if (parents.empty()) { + updated = EMPTY; + } else if (parents.size() == 1) { + updated = SingletonPredictionContext::create(parents[0], context->getReturnState(0)); + contextCache.insert(updated); + } else { + updated = std::make_shared<ArrayPredictionContext>(parents, std::dynamic_pointer_cast<ArrayPredictionContext>(context)->returnStates); + contextCache.insert(updated); + } + + visited[updated] = updated; + visited[context] = updated; + + return updated; +} + +std::vector<Ref<PredictionContext>> PredictionContext::getAllContextNodes(const Ref<PredictionContext> &context) { + std::vector<Ref<PredictionContext>> nodes; + std::set<PredictionContext *> visited; + getAllContextNodes_(context, nodes, visited); + return nodes; +} + + +void PredictionContext::getAllContextNodes_(const Ref<PredictionContext> &context, std::vector<Ref<PredictionContext>> &nodes, + std::set<PredictionContext *> &visited) { + + if (visited.find(context.get()) != visited.end()) { + return; // Already done. + } + + visited.insert(context.get()); + nodes.push_back(context); + + for (size_t i = 0; i < context->size(); i++) { + getAllContextNodes_(context->getParent(i), nodes, visited); + } +} + +std::string PredictionContext::toString() const { + + return antlrcpp::toString(this); +} + +std::string PredictionContext::toString(Recognizer * /*recog*/) const { + return toString(); +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, int currentState) { + return toStrings(recognizer, EMPTY, currentState); +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, const Ref<PredictionContext> &stop, int currentState) { + + std::vector<std::string> result; + + for (size_t perm = 0; ; perm++) { + size_t offset = 0; + bool last = true; + PredictionContext *p = this; + size_t stateNumber = currentState; + + std::stringstream ss; + ss << "["; + bool outerContinue = false; + while (!p->isEmpty() && p != stop.get()) { + size_t index = 0; + if (p->size() > 0) { + size_t bits = 1; + while ((1ULL << bits) < p->size()) { + bits++; + } + + size_t mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p->size() - 1; + if (index >= p->size()) { + outerContinue = true; + break; + } + offset += bits; + } + + if (recognizer != nullptr) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + const ATN &atn = recognizer->getATN(); + ATNState *s = atn.states[stateNumber]; + std::string ruleName = recognizer->getRuleNames()[s->ruleIndex]; + ss << ruleName; + } else if (p->getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p->isEmpty()) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + ss << p->getReturnState(index); + } + } + stateNumber = p->getReturnState(index); + p = p->getParent(index).get(); + } + + if (outerContinue) + continue; + + ss << "]"; + result.push_back(ss.str()); + + if (last) { + break; + } + } + + return result; +} + +//----------------- PredictionContextMergeCache ------------------------------------------------------------------------ + +Ref<PredictionContext> PredictionContextMergeCache::put(Ref<PredictionContext> const& key1, Ref<PredictionContext> const& key2, + Ref<PredictionContext> const& value) { + Ref<PredictionContext> previous; + + auto iterator = _data.find(key1); + if (iterator == _data.end()) + _data[key1][key2] = value; + else { + auto iterator2 = iterator->second.find(key2); + if (iterator2 != iterator->second.end()) + previous = iterator2->second; + iterator->second[key2] = value; + } + + return previous; +} + +Ref<PredictionContext> PredictionContextMergeCache::get(Ref<PredictionContext> const& key1, Ref<PredictionContext> const& key2) { + auto iterator = _data.find(key1); + if (iterator == _data.end()) + return nullptr; + + auto iterator2 = iterator->second.find(key2); + if (iterator2 == iterator->second.end()) + return nullptr; + + return iterator2->second; +} + +void PredictionContextMergeCache::clear() { + _data.clear(); +} + +std::string PredictionContextMergeCache::toString() const { + std::string result; + for (auto pair : _data) + for (auto pair2 : pair.second) + result += pair2.second->toString() + "\n"; + + return result; +} + +size_t PredictionContextMergeCache::count() const { + size_t result = 0; + for (auto entry : _data) + result += entry.second.size(); + return result; +} + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.h new file mode 100644 index 0000000..e8dfc23 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionContext.h @@ -0,0 +1,266 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + struct PredictionContextHasher; + struct PredictionContextComparer; + class PredictionContextMergeCache; + + typedef std::unordered_set<Ref<PredictionContext>, PredictionContextHasher, PredictionContextComparer> PredictionContextCache; + + class ANTLR4CPP_PUBLIC PredictionContext { + public: + /// Represents $ in local context prediction, which means wildcard. + /// *+x = *. + static const Ref<PredictionContext> EMPTY; + + /// Represents $ in an array in full context mode, when $ + /// doesn't mean wildcard: $ + x = [$,x]. Here, + /// $ = EMPTY_RETURN_STATE. + // ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where + // -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't + // conflict with real return states. +#if __cplusplus >= 201703L + static constexpr size_t EMPTY_RETURN_STATE = std::numeric_limits<size_t>::max() - 9; +#else + enum : size_t { + EMPTY_RETURN_STATE = static_cast<size_t>(-10), // std::numeric_limits<size_t>::max() - 9; doesn't work in VS 2013 + }; +#endif + + private: +#if __cplusplus >= 201703L + static constexpr size_t INITIAL_HASH = 1; +#else + enum : size_t { + INITIAL_HASH = 1, + }; +#endif + + public: + static size_t globalNodeCount; + const size_t id; + + /// <summary> + /// Stores the computed hash code of this <seealso cref="PredictionContext"/>. The hash + /// code is computed in parts to match the following reference algorithm. + /// + /// <pre> + /// private int referenceHashCode() { + /// int hash = <seealso cref="MurmurHash#initialize"/>(<seealso cref="#INITIAL_HASH"/>); + /// + /// for (int i = 0; i < <seealso cref="#size()"/>; i++) { + /// hash = <seealso cref="MurmurHash#update"/>(hash, <seealso cref="#getParent"/>(i)); + /// } + /// + /// for (int i = 0; i < <seealso cref="#size()"/>; i++) { + /// hash = <seealso cref="MurmurHash#update"/>(hash, <seealso cref="#getReturnState"/>(i)); + /// } + /// + /// hash = <seealso cref="MurmurHash#finish"/>(hash, 2 * <seealso cref="#size()"/>); + /// return hash; + /// } + /// </pre> + /// </summary> + const size_t cachedHashCode; + + protected: + PredictionContext(size_t cachedHashCode); + ~PredictionContext(); + + public: + /// Convert a RuleContext tree to a PredictionContext graph. + /// Return EMPTY if outerContext is empty. + static Ref<PredictionContext> fromRuleContext(const ATN &atn, RuleContext *outerContext); + + virtual size_t size() const = 0; + virtual Ref<PredictionContext> getParent(size_t index) const = 0; + virtual size_t getReturnState(size_t index) const = 0; + + virtual bool operator == (const PredictionContext &o) const = 0; + + /// This means only the EMPTY (wildcard? not sure) context is in set. + virtual bool isEmpty() const; + virtual bool hasEmptyPath() const; + virtual size_t hashCode() const; + + protected: + static size_t calculateEmptyHashCode(); + static size_t calculateHashCode(Ref<PredictionContext> parent, size_t returnState); + static size_t calculateHashCode(const std::vector<Ref<PredictionContext>> &parents, + const std::vector<size_t> &returnStates); + + public: + // dispatch + static Ref<PredictionContext> merge(const Ref<PredictionContext> &a, const Ref<PredictionContext> &b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + /// <summary> + /// Merge two <seealso cref="SingletonPredictionContext"/> instances. + /// + /// <p/> + /// + /// Stack tops equal, parents merge is same; return left graph.<br/> + /// <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.<br/> + /// <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/> + /// </summary> + /// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="rootIsWildcard"> {@code true} if this is a local-context merge, + /// otherwise false to indicate a full-context merge </param> + /// <param name="mergeCache"> </param> + static Ref<PredictionContext> mergeSingletons(const Ref<SingletonPredictionContext> &a, + const Ref<SingletonPredictionContext> &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + /** + * Handle case where at least one of {@code a} or {@code b} is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + * <h2>Local-Context Merges</h2> + * + * <p>These local-context merge operations are used when {@code rootIsWildcard} + * is true.</p> + * + * <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br> + * <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.<br> + * <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p> + * + * <p>Special case of last merge if local context.<br> + * <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p> + * + * <h2>Full-Context Merges</h2> + * + * <p>These full-context merge operations are used when {@code rootIsWildcard} + * is false.</p> + * + * <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p> + * + * <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).<br> + * <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p> + * + * @param a the first {@link SingletonPredictionContext} + * @param b the second {@link SingletonPredictionContext} + * @param rootIsWildcard {@code true} if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static Ref<PredictionContext> mergeRoot(const Ref<SingletonPredictionContext> &a, + const Ref<SingletonPredictionContext> &b, bool rootIsWildcard); + + /** + * Merge two {@link ArrayPredictionContext} instances. + * + * <p>Different tops, different parents.<br> + * <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, same parents.<br> + * <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, different parents.<br> + * <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, all shared parents.<br> + * <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p> + * + * <p>Equal tops, merge parents and reduce top to + * {@link SingletonPredictionContext}.<br> + * <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p> + */ + static Ref<PredictionContext> mergeArrays(const Ref<ArrayPredictionContext> &a, + const Ref<ArrayPredictionContext> &b, bool rootIsWildcard, PredictionContextMergeCache *mergeCache); + + protected: + /// Make pass over all M parents; merge any equal() ones. + /// @returns true if the list has been changed (i.e. duplicates where found). + static bool combineCommonParents(std::vector<Ref<PredictionContext>> &parents); + + public: + static std::string toDOTString(const Ref<PredictionContext> &context); + + static Ref<PredictionContext> getCachedContext(const Ref<PredictionContext> &context, + PredictionContextCache &contextCache, + std::map<Ref<PredictionContext>, Ref<PredictionContext>> &visited); + + // ter's recursive version of Sam's getAllNodes() + static std::vector<Ref<PredictionContext>> getAllContextNodes(const Ref<PredictionContext> &context); + static void getAllContextNodes_(const Ref<PredictionContext> &context, + std::vector<Ref<PredictionContext>> &nodes, std::set<PredictionContext *> &visited); + + virtual std::string toString() const; + virtual std::string toString(Recognizer *recog) const; + + std::vector<std::string> toStrings(Recognizer *recognizer, int currentState); + std::vector<std::string> toStrings(Recognizer *recognizer, const Ref<PredictionContext> &stop, int currentState); + }; + + struct PredictionContextHasher { + size_t operator () (const Ref<PredictionContext> &k) const { + return k->hashCode(); + } + }; + + struct PredictionContextComparer { + bool operator () (const Ref<PredictionContext> &lhs, const Ref<PredictionContext> &rhs) const + { + if (lhs == rhs) // Object identity. + return true; + return (lhs->hashCode() == rhs->hashCode()) && (*lhs == *rhs); + } + }; + + class PredictionContextMergeCache { + public: + Ref<PredictionContext> put(Ref<PredictionContext> const& key1, Ref<PredictionContext> const& key2, + Ref<PredictionContext> const& value); + Ref<PredictionContext> get(Ref<PredictionContext> const& key1, Ref<PredictionContext> const& key2); + + void clear(); + std::string toString() const; + size_t count() const; + + private: + std::unordered_map<Ref<PredictionContext>, + std::unordered_map<Ref<PredictionContext>, Ref<PredictionContext>, PredictionContextHasher, PredictionContextComparer>, + PredictionContextHasher, PredictionContextComparer> _data; + + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.cpp new file mode 100644 index 0000000..d15a826 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.cpp @@ -0,0 +1,201 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" +#include "SemanticContext.h" + +#include "PredictionMode.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +struct AltAndContextConfigHasher +{ + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + size_t operator () (ATNConfig *o) const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, o->state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, o->context); + return misc::MurmurHash::finish(hashCode, 2); + } +}; + +struct AltAndContextConfigComparer { + bool operator()(ATNConfig *a, ATNConfig *b) const + { + if (a == b) { + return true; + } + return a->state->stateNumber == b->state->stateNumber && *a->context == *b->context; + } +}; + +bool PredictionModeClass::hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs) { + /* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + + bool heuristic; + + // Pure SLL mode parsing or SLL+LL if: + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL; costs more time + // since we'll often fail over anyway. + if (mode == PredictionMode::SLL || !configs->hasSemanticContext) { + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(configs); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + } else { + // dup configs, tossing out semantic predicates + ATNConfigSet dup(true); + for (auto &config : configs->configs) { + Ref<ATNConfig> c = std::make_shared<ATNConfig>(config, SemanticContext::NONE); + dup.add(c); + } + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(&dup); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(&dup); + } + + return heuristic; +} + +bool PredictionModeClass::hasConfigInRuleStopState(ATNConfigSet *configs) { + for (auto &c : configs->configs) { + if (is<RuleStopState *>(c->state)) { + return true; + } + } + + return false; +} + +bool PredictionModeClass::allConfigsInRuleStopStates(ATNConfigSet *configs) { + for (auto &config : configs->configs) { + if (!is<RuleStopState*>(config->state)) { + return false; + } + } + + return true; +} + +size_t PredictionModeClass::resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + return getSingleViableAlt(altsets); +} + +bool PredictionModeClass::allSubsetsConflict(const std::vector<antlrcpp::BitSet>& altsets) { + return !hasNonConflictingAltSet(altsets); +} + +bool PredictionModeClass::hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() == 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::hasConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() > 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::allSubsetsEqual(const std::vector<antlrcpp::BitSet>& altsets) { + if (altsets.empty()) { + return true; + } + + const antlrcpp::BitSet& first = *altsets.begin(); + for (const antlrcpp::BitSet& alts : altsets) { + if (alts != first) { + return false; + } + } + return true; +} + +size_t PredictionModeClass::getUniqueAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all = getAlts(altsets); + if (all.count() == 1) { + return all.nextSetBit(0); + } + return ATN::INVALID_ALT_NUMBER; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all; + for (antlrcpp::BitSet alts : altsets) { + all |= alts; + } + + return all; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(ATNConfigSet *configs) { + antlrcpp::BitSet alts; + for (auto &config : configs->configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(ATNConfigSet *configs) { + std::unordered_map<ATNConfig *, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts; + for (auto &config : configs->configs) { + configToAlts[config.get()].set(config->alt); + } + std::vector<antlrcpp::BitSet> values; + for (auto it : configToAlts) { + values.push_back(it.second); + } + return values; +} + +std::map<ATNState*, antlrcpp::BitSet> PredictionModeClass::getStateToAltMap(ATNConfigSet *configs) { + std::map<ATNState*, antlrcpp::BitSet> m; + for (auto &c : configs->configs) { + m[c->state].set(c->alt); + } + return m; +} + +bool PredictionModeClass::hasStateAssociatedWithOneAlt(ATNConfigSet *configs) { + std::map<ATNState*, antlrcpp::BitSet> x = getStateToAltMap(configs); + for (std::map<ATNState*, antlrcpp::BitSet>::iterator it = x.begin(); it != x.end(); it++){ + if (it->second.count() == 1) return true; + } + return false; +} + +size_t PredictionModeClass::getSingleViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet viableAlts; + for (antlrcpp::BitSet alts : altsets) { + size_t minAlt = alts.nextSetBit(0); + + viableAlts.set(minAlt); + if (viableAlts.count() > 1) // more than 1 viable alt + { + return ATN::INVALID_ALT_NUMBER; + } + } + + return viableAlts.nextSetBit(0); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.h new file mode 100644 index 0000000..726f4cf --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.h @@ -0,0 +1,436 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ + enum class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + * <p> + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + SLL, + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + * <p> + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for <em>exactly which</em> alternatives are + * ambiguous.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL, + + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + * <p> + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL_EXACT_AMBIG_DETECTION + }; + + class ANTLR4CPP_PUBLIC PredictionModeClass { + public: + /** + * Computes the SLL prediction termination condition. + * + * <p> + * This method computes the SLL prediction termination condition for both of + * the following cases.</p> + * + * <ul> + * <li>The usual SLL+LL fallback upon SLL conflict</li> + * <li>Pure SLL without LL fallback</li> + * </ul> + * + * <p><strong>COMBINED SLL+LL PARSING</strong></p> + * + * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.</p> + * + * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.</p> + * + * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.</p> + * + * <p><strong>HEURISTIC</strong></p> + * + * <p>As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):</p> + * + * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> + * + * <p>When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.</p> + * + * <p>It also let's us continue for this rule:</p> + * + * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> + * + * <p>After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.</p> + * + * <p><strong>PURE SLL PARSING</strong></p> + * + * <p>To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.</p> + * + * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> + * + * <p>SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.</p> + * + * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> + * + * <p>Before testing these configurations against others, we have to merge + * {@code x} and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> + * + * <p>If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * {@link ATNConfigSet} will merge everything ignoring predicates.</p> + */ + static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs); + + /// <summary> + /// Checks if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool hasConfigInRuleStopState(ATNConfigSet *configs); + + /// <summary> + /// Checks if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool allConfigsInRuleStopStates(ATNConfigSet *configs); + + /** + * Full LL prediction termination. + * + * <p>Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.</p> + * + * <p>The basic idea is to split the set of configurations {@code C}, into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> + * + * <p>Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:</p> + * + * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * {@code C} holding {@code s} and {@code ctx} fixed.</p> + * + * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> + * + * <pre> + * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not + * alt and not pred + * </pre> + * + * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> + * + * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with + * {@code s} and {@code ctx}.</p> + * + * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.</p> + * + * <p>The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.</p> + * + * <p>No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.</p> + * + * <p><strong>CONFLICTING CONFIGS</strong></p> + * + * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most {@code n} configurations associated with state {@code s} for + * {@code n} possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts {@code x} and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either {@code x} or + * {@code x'}. If the {@code x} associated with lowest alternative {@code i} + * is the superset, then {@code i} is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if {@code x} is + * associated with {@code j>i} then at least one stack configuration for + * {@code j} is not in conflict with alternative {@code i}. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.</p> + * + * <p>For simplicity, I'm doing a equality check between {@code x} and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.</p> + * + * <p><strong>CONTINUE/STOP RULE</strong></p> + * + * <p>Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.</p> + * + * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.</p> + * + * <p><strong>CASES</strong></p> + * + * <ul> + * + * <li>no conflicts and more than 1 alternative in set => continue</li> + * + * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + * </li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue</li> + * + * </ul> + * + * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> + * + * <p>If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.</p> + * + * <p><code>|A_<em>i</em>|>1</code> and + * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> + * + * <p>In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> + */ + static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} contains more + /// than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets} + /// has + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// exactly one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false} + /// </returns> + static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// more than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} is equivalent. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every member of {@code altsets} is equal to the + /// others, otherwise {@code false} </returns> + static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Returns the unique alternative predicted by all alternative subsets in + /// {@code altsets}. If no such alternative exists, this method returns + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each <seealso + /// cref="BitSet"/> + /// in {@code altsets}. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> the set of represented alternatives in {@code altsets} </returns> + static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets); + + /** Get union of all alts from configs. @since 4.5.1 */ + static antlrcpp::BitSet getAlts(ATNConfigSet *configs); + + /// <summary> + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and + /// x, not + /// alt and not pred + /// </pre> + /// </summary> + static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs); + + /// <summary> + /// Get a map from state to alt subset from a configuration set. For each + /// configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso + /// cref="ATNConfig#alt alt"/> + /// </pre> + /// </summary> + static std::map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs); + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs); + + static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.cpp new file mode 100644 index 0000000..62fc12f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.cpp @@ -0,0 +1,179 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateEvalInfo.h" +#include "atn/LookaheadEventInfo.h" +#include "Parser.h" +#include "atn/ATNConfigSet.h" +#include "support/CPPUtils.h" + +#include "atn/ProfilingATNSimulator.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::dfa; +using namespace antlrcpp; + +using namespace std::chrono; + +ProfilingATNSimulator::ProfilingATNSimulator(Parser *parser) + : ParserATNSimulator(parser, parser->getInterpreter<ParserATNSimulator>()->atn, + parser->getInterpreter<ParserATNSimulator>()->decisionToDFA, + parser->getInterpreter<ParserATNSimulator>()->getSharedContextCache()) { + for (size_t i = 0; i < atn.decisionToState.size(); i++) { + _decisions.push_back(DecisionInfo(i)); + } +} + +size_t ProfilingATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + auto onExit = finally([this](){ + _currentDecision = 0; // Originally -1, but that makes no sense (index into a vector and init value is also 0). + }); + + _sllStopIndex = -1; + _llStopIndex = -1; + _currentDecision = decision; + high_resolution_clock::time_point start = high_resolution_clock::now(); + size_t alt = ParserATNSimulator::adaptivePredict(input, decision, outerContext); + high_resolution_clock::time_point stop = high_resolution_clock::now(); + _decisions[decision].timeInPrediction += duration_cast<nanoseconds>(stop - start).count(); + _decisions[decision].invocations++; + + long long SLL_k = _sllStopIndex - _startIndex + 1; + _decisions[decision].SLL_TotalLook += SLL_k; + _decisions[decision].SLL_MinLook = _decisions[decision].SLL_MinLook == 0 ? SLL_k : std::min(_decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > _decisions[decision].SLL_MaxLook) { + _decisions[decision].SLL_MaxLook = SLL_k; + _decisions[decision].SLL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + long long LL_k = _llStopIndex - _startIndex + 1; + _decisions[decision].LL_TotalLook += LL_k; + _decisions[decision].LL_MinLook = _decisions[decision].LL_MinLook == 0 ? LL_k : std::min(_decisions[decision].LL_MinLook, LL_k); + if (LL_k > _decisions[decision].LL_MaxLook) { + _decisions[decision].LL_MaxLook = LL_k; + _decisions[decision].LL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _llStopIndex, true); + } + } + + return alt; +} + +DFAState* ProfilingATNSimulator::getExistingTargetState(DFAState *previousD, size_t t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = (int)_input->index(); + + DFAState *existingTargetState = ParserATNSimulator::getExistingTargetState(previousD, t); + if (existingTargetState != nullptr) { + _decisions[_currentDecision].SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ERROR.get()) { + _decisions[_currentDecision].errors.push_back( + ErrorInfo(_currentDecision, previousD->configs.get(), _input, _startIndex, _sllStopIndex, false) + ); + } + } + + _currentState = existingTargetState; + return existingTargetState; +} + +DFAState* ProfilingATNSimulator::computeTargetState(DFA &dfa, DFAState *previousD, size_t t) { + DFAState *state = ParserATNSimulator::computeTargetState(dfa, previousD, t); + _currentState = state; + return state; +} + +std::unique_ptr<ATNConfigSet> ProfilingATNSimulator::computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = (int)_input->index(); + } + + std::unique_ptr<ATNConfigSet> reachConfigs = ParserATNSimulator::computeReachSet(closure, t, fullCtx); + if (fullCtx) { + _decisions[_currentDecision].LL_ATNTransitions++; // count computation even if error + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _llStopIndex, true)); + } + } else { + ++_decisions[_currentDecision].SLL_ATNTransitions; + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; +} + +bool ProfilingATNSimulator::evalSemanticContext(Ref<SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) { + bool result = ParserATNSimulator::evalSemanticContext(pred, parserCallStack, alt, fullCtx); + if (!(std::dynamic_pointer_cast<SemanticContext::PrecedencePredicate>(pred) != nullptr)) { + bool fullContext = _llStopIndex >= 0; + int stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + _decisions[_currentDecision].predicateEvals.push_back( + PredicateEvalInfo(_currentDecision, _input, _startIndex, stopIndex, pred, result, alt, fullCtx)); + } + + return result; +} + +void ProfilingATNSimulator::reportAttemptingFullContext(DFA &dfa, const BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (conflictingAlts.count() > 0) { + conflictingAltResolvedBySLL = conflictingAlts.nextSetBit(0); + } else { + conflictingAltResolvedBySLL = configs->getAlts().nextSetBit(0); + } + _decisions[_currentDecision].LL_Fallback++; + ParserATNSimulator::reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportContextSensitivity(DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + ParserATNSimulator::reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportAmbiguity(DFA &dfa, DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const BitSet &ambigAlts, ATNConfigSet *configs) { + size_t prediction; + if (ambigAlts.count() > 0) { + prediction = ambigAlts.nextSetBit(0); + } else { + prediction = configs->getAlts().nextSetBit(0); + } + if (configs->fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + _decisions[_currentDecision].ambiguities.push_back( + AmbiguityInfo(_currentDecision, configs, ambigAlts, _input, startIndex, stopIndex, configs->fullCtx) + ); + ParserATNSimulator::reportAmbiguity(dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); +} + +std::vector<DecisionInfo> ProfilingATNSimulator::getDecisionInfo() const { + return _decisions; +} + +DFAState* ProfilingATNSimulator::getCurrentState() const { + return _currentState; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.h new file mode 100644 index 0000000..79ecd00 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/ProfilingATNSimulator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ParserATNSimulator.h" +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ProfilingATNSimulator : public ParserATNSimulator { + public: + ProfilingATNSimulator(Parser *parser); + + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) override; + + virtual std::vector<DecisionInfo> getDecisionInfo() const; + virtual dfa::DFAState* getCurrentState() const; + + protected: + std::vector<DecisionInfo> _decisions; + + int _sllStopIndex = 0; + int _llStopIndex = 0; + + size_t _currentDecision = 0; + dfa::DFAState *_currentState; + + /// <summary> + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + /// </summary> + size_t conflictingAltResolvedBySLL = 0; + + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t) override; + virtual dfa::DFAState* computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) override; + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) override; + virtual bool evalSemanticContext(Ref<SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) override; + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportAmbiguity(dfa::DFA &dfa, dfa::DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.cpp new file mode 100644 index 0000000..58d668c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" + +#include "atn/RangeTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RangeTransition::RangeTransition(ATNState *target, size_t from, size_t to) : Transition(target), from(from), to(to) { +} + +Transition::SerializationType RangeTransition::getSerializationType() const { + return RANGE; +} + +misc::IntervalSet RangeTransition::label() const { + return misc::IntervalSet::of((int)from, (int)to); +} + +bool RangeTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return symbol >= from && symbol <= to; +} + +std::string RangeTransition::toString() const { + return "RANGE " + Transition::toString() + " { from: " + std::to_string(from) + ", to: " + std::to_string(to) + " }"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.h new file mode 100644 index 0000000..14093e2 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RangeTransition.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RangeTransition final : public Transition { + public: + const size_t from; + const size_t to; + + RangeTransition(ATNState *target, size_t from, size_t to); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.cpp new file mode 100644 index 0000000..555f8c2 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStartState.h" + +using namespace antlr4::atn; + +RuleStartState::RuleStartState() { + isLeftRecursiveRule = false; +} + +size_t RuleStartState::getStateType() { + return RULE_START; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.h new file mode 100644 index 0000000..94ab0e4 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStartState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleStartState final : public ATNState { + public: + RuleStartState(); + + RuleStopState *stopState = nullptr; + bool isLeftRecursiveRule = false; + + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.cpp new file mode 100644 index 0000000..3ceece4 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" + +using namespace antlr4::atn; + +size_t RuleStopState::getStateType() { + return RULE_STOP; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.h new file mode 100644 index 0000000..8a4a580 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleStopState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// The last node in the ATN for a rule, unless that rule is the start symbol. + /// In that case, there is one transition to EOF. Later, we might encode + /// references to all calls to this rule to compute FOLLOW sets for + /// error handling. + class ANTLR4CPP_PUBLIC RuleStopState final : public ATNState { + + public: + virtual size_t getStateType() override; + + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.cpp new file mode 100644 index 0000000..c52f16d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStartState.h" +#include "atn/RuleTransition.h" + +using namespace antlr4::atn; + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState) + : RuleTransition(ruleStart, ruleIndex, 0, followState) { +} + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState) + : Transition(ruleStart), ruleIndex(ruleIndex), precedence(precedence) { + this->followState = followState; +} + +Transition::SerializationType RuleTransition::getSerializationType() const { + return RULE; +} + +bool RuleTransition::isEpsilon() const { + return true; +} + +bool RuleTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string RuleTransition::toString() const { + std::stringstream ss; + ss << "RULE " << Transition::toString() << " { ruleIndex: " << ruleIndex << ", precedence: " << precedence << + ", followState: " << std::hex << followState << " }"; + return ss.str(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.h new file mode 100644 index 0000000..50d3d29 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/RuleTransition.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleTransition : public Transition { + public: + /// Ptr to the rule definition object for this rule ref. + const size_t ruleIndex; // no Rule object at runtime + + const int precedence; + + /// What node to begin computations following ref to rule. + ATNState *followState; + + /// @deprecated Use + /// <seealso cref="#RuleTransition(RuleStartState, size_t, int, ATNState)"/> instead. + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState); + + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState); + RuleTransition(RuleTransition const&) = delete; + RuleTransition& operator=(RuleTransition const&) = delete; + + virtual SerializationType getSerializationType() const override; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.cpp new file mode 100644 index 0000000..0531e37 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.cpp @@ -0,0 +1,377 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" + +#include "SemanticContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +//------------------ Predicate ----------------------------------------------------------------------------------------- + +SemanticContext::Predicate::Predicate() : Predicate(INVALID_INDEX, INVALID_INDEX, false) { +} + +SemanticContext::Predicate::Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent) +: ruleIndex(ruleIndex), predIndex(predIndex), isCtxDependent(isCtxDependent) { +} + + +bool SemanticContext::Predicate::eval(Recognizer *parser, RuleContext *parserCallStack) { + RuleContext *localctx = nullptr; + if (isCtxDependent) + localctx = parserCallStack; + return parser->sempred(localctx, ruleIndex, predIndex); +} + +size_t SemanticContext::Predicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, ruleIndex); + hashCode = misc::MurmurHash::update(hashCode, predIndex); + hashCode = misc::MurmurHash::update(hashCode, isCtxDependent ? 1 : 0); + hashCode = misc::MurmurHash::finish(hashCode, 3); + return hashCode; +} + +bool SemanticContext::Predicate::operator == (const SemanticContext &other) const { + if (this == &other) + return true; + + const Predicate *p = dynamic_cast<const Predicate*>(&other); + if (p == nullptr) + return false; + + return ruleIndex == p->ruleIndex && predIndex == p->predIndex && isCtxDependent == p->isCtxDependent; +} + +std::string SemanticContext::Predicate::toString() const { + return std::string("{") + std::to_string(ruleIndex) + std::string(":") + std::to_string(predIndex) + std::string("}?"); +} + +//------------------ PrecedencePredicate ------------------------------------------------------------------------------- + +SemanticContext::PrecedencePredicate::PrecedencePredicate() : precedence(0) { +} + +SemanticContext::PrecedencePredicate::PrecedencePredicate(int precedence) : precedence(precedence) { +} + +bool SemanticContext::PrecedencePredicate::eval(Recognizer *parser, RuleContext *parserCallStack) { + return parser->precpred(parserCallStack, precedence); +} + +Ref<SemanticContext> SemanticContext::PrecedencePredicate::evalPrecedence(Recognizer *parser, + RuleContext *parserCallStack) { + if (parser->precpred(parserCallStack, precedence)) { + return SemanticContext::NONE; + } + else { + return nullptr; + } +} + +int SemanticContext::PrecedencePredicate::compareTo(PrecedencePredicate *o) { + return precedence - o->precedence; +} + +size_t SemanticContext::PrecedencePredicate::hashCode() const { + size_t hashCode = 1; + hashCode = 31 * hashCode + static_cast<size_t>(precedence); + return hashCode; +} + +bool SemanticContext::PrecedencePredicate::operator == (const SemanticContext &other) const { + if (this == &other) + return true; + + const PrecedencePredicate *predicate = dynamic_cast<const PrecedencePredicate *>(&other); + if (predicate == nullptr) + return false; + + return precedence == predicate->precedence; +} + +std::string SemanticContext::PrecedencePredicate::toString() const { + return "{" + std::to_string(precedence) + ">=prec}?"; +} + +//------------------ AND ----------------------------------------------------------------------------------------------- + +SemanticContext::AND::AND(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b) { + Set operands; + + if (is<AND>(a)) { + for (auto operand : std::dynamic_pointer_cast<AND>(a)->opnds) { + operands.insert(operand); + } + } else { + operands.insert(a); + } + + if (is<AND>(b)) { + for (auto operand : std::dynamic_pointer_cast<AND>(b)->opnds) { + operands.insert(operand); + } + } else { + operands.insert(b); + } + + std::vector<Ref<PrecedencePredicate>> precedencePredicates = filterPrecedencePredicates(operands); + + if (!precedencePredicates.empty()) { + // interested in the transition with the lowest precedence + auto predicate = [](Ref<PrecedencePredicate> const& a, Ref<PrecedencePredicate> const& b) { + return a->precedence < b->precedence; + }; + + auto reduced = std::min_element(precedencePredicates.begin(), precedencePredicates.end(), predicate); + operands.insert(*reduced); + } + + std::copy(operands.begin(), operands.end(), std::back_inserter(opnds)); +} + +std::vector<Ref<SemanticContext>> SemanticContext::AND::getOperands() const { + return opnds; +} + +bool SemanticContext::AND::operator == (const SemanticContext &other) const { + if (this == &other) + return true; + + const AND *context = dynamic_cast<const AND *>(&other); + if (context == nullptr) + return false; + + return Arrays::equals(opnds, context->opnds); +} + +size_t SemanticContext::AND::hashCode() const { + return misc::MurmurHash::hashCode(opnds, typeid(AND).hash_code()); +} + +bool SemanticContext::AND::eval(Recognizer *parser, RuleContext *parserCallStack) { + for (auto opnd : opnds) { + if (!opnd->eval(parser, parserCallStack)) { + return false; + } + } + return true; +} + +Ref<SemanticContext> SemanticContext::AND::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) { + bool differs = false; + std::vector<Ref<SemanticContext>> operands; + for (auto context : opnds) { + Ref<SemanticContext> evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == nullptr) { + // The AND context is false if any element is false. + return nullptr; + } else if (evaluated != NONE) { + // Reduce the result by skipping true elements. + operands.push_back(evaluated); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were true, so the AND context is true. + return NONE; + } + + Ref<SemanticContext> result = operands[0]; + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::And(result, operands[i]); + } + + return result; +} + +std::string SemanticContext::AND::toString() const { + std::string tmp; + for (auto var : opnds) { + tmp += var->toString() + " && "; + } + return tmp; +} + +//------------------ OR ------------------------------------------------------------------------------------------------ + +SemanticContext::OR::OR(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b) { + Set operands; + + if (is<OR>(a)) { + for (auto operand : std::dynamic_pointer_cast<OR>(a)->opnds) { + operands.insert(operand); + } + } else { + operands.insert(a); + } + + if (is<OR>(b)) { + for (auto operand : std::dynamic_pointer_cast<OR>(b)->opnds) { + operands.insert(operand); + } + } else { + operands.insert(b); + } + + std::vector<Ref<PrecedencePredicate>> precedencePredicates = filterPrecedencePredicates(operands); + if (!precedencePredicates.empty()) { + // interested in the transition with the highest precedence + auto predicate = [](Ref<PrecedencePredicate> const& a, Ref<PrecedencePredicate> const& b) { + return a->precedence < b->precedence; + }; + auto reduced = std::max_element(precedencePredicates.begin(), precedencePredicates.end(), predicate); + operands.insert(*reduced); + } + + std::copy(operands.begin(), operands.end(), std::back_inserter(opnds)); +} + +std::vector<Ref<SemanticContext>> SemanticContext::OR::getOperands() const { + return opnds; +} + +bool SemanticContext::OR::operator == (const SemanticContext &other) const { + if (this == &other) + return true; + + const OR *context = dynamic_cast<const OR *>(&other); + if (context == nullptr) + return false; + + return Arrays::equals(opnds, context->opnds); +} + +size_t SemanticContext::OR::hashCode() const { + return misc::MurmurHash::hashCode(opnds, typeid(OR).hash_code()); +} + +bool SemanticContext::OR::eval(Recognizer *parser, RuleContext *parserCallStack) { + for (auto opnd : opnds) { + if (opnd->eval(parser, parserCallStack)) { + return true; + } + } + return false; +} + +Ref<SemanticContext> SemanticContext::OR::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) { + bool differs = false; + std::vector<Ref<SemanticContext>> operands; + for (auto context : opnds) { + Ref<SemanticContext> evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == NONE) { + // The OR context is true if any element is true. + return NONE; + } else if (evaluated != nullptr) { + // Reduce the result by skipping false elements. + operands.push_back(evaluated); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were false, so the OR context is false. + return nullptr; + } + + Ref<SemanticContext> result = operands[0]; + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::Or(result, operands[i]); + } + + return result; +} + +std::string SemanticContext::OR::toString() const { + std::string tmp; + for(auto var : opnds) { + tmp += var->toString() + " || "; + } + return tmp; +} + +//------------------ SemanticContext ----------------------------------------------------------------------------------- + +const Ref<SemanticContext> SemanticContext::NONE = std::make_shared<Predicate>(INVALID_INDEX, INVALID_INDEX, false); + +SemanticContext::~SemanticContext() { +} + +bool SemanticContext::operator != (const SemanticContext &other) const { + return !(*this == other); +} + +Ref<SemanticContext> SemanticContext::evalPrecedence(Recognizer * /*parser*/, RuleContext * /*parserCallStack*/) { + return shared_from_this(); +} + +Ref<SemanticContext> SemanticContext::And(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b) { + if (!a || a == NONE) { + return b; + } + + if (!b || b == NONE) { + return a; + } + + Ref<AND> result = std::make_shared<AND>(a, b); + if (result->opnds.size() == 1) { + return result->opnds[0]; + } + + return result; +} + +Ref<SemanticContext> SemanticContext::Or(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b) { + if (!a) { + return b; + } + if (!b) { + return a; + } + + if (a == NONE || b == NONE) { + return NONE; + } + + Ref<OR> result = std::make_shared<OR>(a, b); + if (result->opnds.size() == 1) { + return result->opnds[0]; + } + + return result; +} + +std::vector<Ref<SemanticContext::PrecedencePredicate>> SemanticContext::filterPrecedencePredicates(const Set &collection) { + std::vector<Ref<SemanticContext::PrecedencePredicate>> result; + for (auto context : collection) { + if (antlrcpp::is<PrecedencePredicate>(context)) { + result.push_back(std::dynamic_pointer_cast<PrecedencePredicate>(context)); + } + } + + return result; +} + + +//------------------ Operator ----------------------------------------------------------------------------------------- + +SemanticContext::Operator::~Operator() { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.h new file mode 100644 index 0000000..7ccc16c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SemanticContext.h @@ -0,0 +1,222 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "support/CPPUtils.h" + +namespace antlr4 { +namespace atn { + + /// A tree structure used to record the semantic context in which + /// an ATN configuration is valid. It's either a single predicate, + /// a conjunction "p1 && p2", or a sum of products "p1||p2". + /// + /// I have scoped the AND, OR, and Predicate subclasses of + /// SemanticContext within the scope of this outer class. + class ANTLR4CPP_PUBLIC SemanticContext : public std::enable_shared_from_this<SemanticContext> { + public: + struct Hasher + { + size_t operator()(Ref<SemanticContext> const& k) const { + return k->hashCode(); + } + }; + + struct Comparer { + bool operator()(Ref<SemanticContext> const& lhs, Ref<SemanticContext> const& rhs) const { + if (lhs == rhs) + return true; + return (lhs->hashCode() == rhs->hashCode()) && (*lhs == *rhs); + } + }; + + + using Set = std::unordered_set<Ref<SemanticContext>, Hasher, Comparer>; + + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const Ref<SemanticContext> NONE; + + virtual ~SemanticContext(); + + virtual size_t hashCode() const = 0; + virtual std::string toString() const = 0; + virtual bool operator == (const SemanticContext &other) const = 0; + virtual bool operator != (const SemanticContext &other) const; + + /// <summary> + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + /// <p/> + /// For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation. + /// </summary> + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) = 0; + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + * <ul> + * <li>{@link #NONE}: if the predicate simplifies to {@code true} after + * precedence predicates are evaluated.</li> + * <li>{@code null}: if the predicate simplifies to {@code false} after + * precedence predicates are evaluated.</li> + * <li>{@code this}: if the semantic context is not changed as a result of + * precedence predicate evaluation.</li> + * <li>A non-{@code null} {@link SemanticContext}: the new simplified + * semantic context after precedence predicates are evaluated.</li> + * </ul> + */ + virtual Ref<SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack); + + static Ref<SemanticContext> And(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b); + + /// See also: ParserATNSimulator::getPredsForAmbigAlts. + static Ref<SemanticContext> Or(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b); + + class Predicate; + class PrecedencePredicate; + class Operator; + class AND; + class OR; + + private: + static std::vector<Ref<PrecedencePredicate>> filterPrecedencePredicates(const Set &collection); + }; + + class ANTLR4CPP_PUBLIC SemanticContext::Predicate : public SemanticContext { + public: + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + protected: + Predicate(); + + public: + Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual size_t hashCode() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual std::string toString() const override; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::PrecedencePredicate : public SemanticContext { + public: + const int precedence; + + protected: + PrecedencePredicate(); + + public: + PrecedencePredicate(int precedence); + + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref<SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual int compareTo(PrecedencePredicate *o); + virtual size_t hashCode() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual std::string toString() const override; + }; + + /** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ + class ANTLR4CPP_PUBLIC SemanticContext::Operator : public SemanticContext { + public: + virtual ~Operator() override; + + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of {@link SemanticContext} operands for the + * operator. + * + * @since 4.3 + */ + + virtual std::vector<Ref<SemanticContext>> getOperands() const = 0; + }; + + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + class ANTLR4CPP_PUBLIC SemanticContext::AND : public SemanticContext::Operator { + public: + std::vector<Ref<SemanticContext>> opnds; + + AND(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b) ; + + virtual std::vector<Ref<SemanticContext>> getOperands() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual size_t hashCode() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered.</p> + */ + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref<SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual std::string toString() const override; + }; + + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ + class ANTLR4CPP_PUBLIC SemanticContext::OR : public SemanticContext::Operator { + public: + std::vector<Ref<SemanticContext>> opnds; + + OR(Ref<SemanticContext> const& a, Ref<SemanticContext> const& b); + + virtual std::vector<Ref<SemanticContext>> getOperands() const override; + virtual bool operator == (const SemanticContext &other) const override; + virtual size_t hashCode() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered. + */ + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) override; + virtual Ref<SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) override; + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for SemanticContext, used in the MurmurHash::update function + +namespace std { + using antlr4::atn::SemanticContext; + + template <> struct hash<SemanticContext> + { + size_t operator () (SemanticContext &x) const + { + return x.hashCode(); + } + }; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.cpp new file mode 100644 index 0000000..35d6905 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "misc/IntervalSet.h" + +#include "atn/SetTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +SetTransition::SetTransition(ATNState *target, const misc::IntervalSet &aSet) + : Transition(target), set(aSet.isEmpty() ? misc::IntervalSet::of(Token::INVALID_TYPE) : aSet) { +} + +Transition::SerializationType SetTransition::getSerializationType() const { + return SET; +} + +misc::IntervalSet SetTransition::label() const { + return set; +} + +bool SetTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return set.contains(symbol); +} + +std::string SetTransition::toString() const { + return "SET " + Transition::toString() + " { set: " + set.toString() + "}"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.h new file mode 100644 index 0000000..044d41a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SetTransition.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A transition containing a set of values. </summary> + class ANTLR4CPP_PUBLIC SetTransition : public Transition { + public: + const misc::IntervalSet set; + + SetTransition(ATNState *target, const misc::IntervalSet &set); + + virtual SerializationType getSerializationType() const override; + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.cpp new file mode 100644 index 0000000..39ad9fb --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.cpp @@ -0,0 +1,81 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EmptyPredictionContext.h" + +#include "atn/SingletonPredictionContext.h" + +using namespace antlr4::atn; + +SingletonPredictionContext::SingletonPredictionContext(Ref<PredictionContext> const& parent, size_t returnState) + : PredictionContext(parent ? calculateHashCode(parent, returnState) : calculateEmptyHashCode()), + parent(parent), returnState(returnState) { + assert(returnState != ATNState::INVALID_STATE_NUMBER); +} + +SingletonPredictionContext::~SingletonPredictionContext() { +} + +Ref<SingletonPredictionContext> SingletonPredictionContext::create(Ref<PredictionContext> const& parent, size_t returnState) { + + if (returnState == EMPTY_RETURN_STATE && parent) { + // someone can pass in the bits of an array ctx that mean $ + return std::dynamic_pointer_cast<SingletonPredictionContext>(EMPTY); + } + return std::make_shared<SingletonPredictionContext>(parent, returnState); +} + +size_t SingletonPredictionContext::size() const { + return 1; +} + +Ref<PredictionContext> SingletonPredictionContext::getParent(size_t index) const { + assert(index == 0); + ((void)(index)); // Make Release build happy. + return parent; +} + +size_t SingletonPredictionContext::getReturnState(size_t index) const { + assert(index == 0); + ((void)(index)); // Make Release build happy. + return returnState; +} + +bool SingletonPredictionContext::operator == (const PredictionContext &o) const { + if (this == &o) { + return true; + } + + const SingletonPredictionContext *other = dynamic_cast<const SingletonPredictionContext*>(&o); + if (other == nullptr) { + return false; + } + + if (this->hashCode() != other->hashCode()) { + return false; // can't be same if hash is different + } + + if (returnState != other->returnState) + return false; + + if (!parent && !other->parent) + return true; + if (!parent || !other->parent) + return false; + + return *parent == *other->parent; +} + +std::string SingletonPredictionContext::toString() const { + //std::string up = !parent.expired() ? parent.lock()->toString() : ""; + std::string up = parent != nullptr ? parent->toString() : ""; + if (up.length() == 0) { + if (returnState == EMPTY_RETURN_STATE) { + return "$"; + } + return std::to_string(returnState); + } + return std::to_string(returnState) + " " + up; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.h new file mode 100644 index 0000000..f1e993b --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/SingletonPredictionContext.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SingletonPredictionContext : public PredictionContext { + public: + // Usually a parent is linked via a weak ptr. Not so here as we have kinda reverse reference chain. + // There are no child contexts stored here and often the parent context is left dangling when it's + // owning ATNState is released. In order to avoid having this context released as well (leaving all other contexts + // which got this one as parent with a null reference) we use a shared_ptr here instead, to keep those left alone + // parent contexts alive. + const Ref<PredictionContext> parent; + const size_t returnState; + + SingletonPredictionContext(Ref<PredictionContext> const& parent, size_t returnState); + virtual ~SingletonPredictionContext(); + + static Ref<SingletonPredictionContext> create(Ref<PredictionContext> const& parent, size_t returnState); + + virtual size_t size() const override; + virtual Ref<PredictionContext> getParent(size_t index) const override; + virtual size_t getReturnState(size_t index) const override; + virtual bool operator == (const PredictionContext &o) const override; + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.cpp new file mode 100644 index 0000000..e62c0de --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarBlockStartState.h" + +using namespace antlr4::atn; + +size_t StarBlockStartState::getStateType() { + return STAR_BLOCK_START; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.h new file mode 100644 index 0000000..8fae316 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarBlockStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// The block that begins a closure loop. + class ANTLR4CPP_PUBLIC StarBlockStartState final : public BlockStartState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.cpp new file mode 100644 index 0000000..766a858 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarLoopEntryState.h" + +using namespace antlr4::atn; + +StarLoopEntryState::StarLoopEntryState() : DecisionState(), isPrecedenceDecision(false) { +} + +size_t StarLoopEntryState::getStateType() { + return STAR_LOOP_ENTRY; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.h new file mode 100644 index 0000000..a062c58 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopEntryState.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopEntryState final : public DecisionState { + public: + StarLoopEntryState(); + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + * <p>This is a computed property that is calculated during ATN deserialization + * and stored for use in {@link ParserATNSimulator} and + * {@link ParserInterpreter}.</p> + * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + StarLoopbackState *loopBackState = nullptr; + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.cpp new file mode 100644 index 0000000..f510589 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarLoopEntryState.h" +#include "atn/Transition.h" + +#include "atn/StarLoopbackState.h" + +using namespace antlr4::atn; + +StarLoopEntryState *StarLoopbackState::getLoopEntryState() { + return dynamic_cast<StarLoopEntryState *>(transitions[0]->target); +} + +size_t StarLoopbackState::getStateType() { + return STAR_LOOP_BACK; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.h new file mode 100644 index 0000000..f5db3ef --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/StarLoopbackState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopbackState final : public ATNState { + public: + StarLoopEntryState *getLoopEntryState(); + + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.cpp new file mode 100644 index 0000000..a764278 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/TokensStartState.h" + +using namespace antlr4::atn; + +size_t TokensStartState::getStateType() { + return TOKEN_START; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.h new file mode 100644 index 0000000..e534d04 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/TokensStartState.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The Tokens rule start state linking to each lexer rule start state. + class ANTLR4CPP_PUBLIC TokensStartState final : public DecisionState { + + public: + virtual size_t getStateType() override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.cpp new file mode 100644 index 0000000..15922a3 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "support/Arrays.h" + +#include "atn/Transition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +using namespace antlrcpp; + +const std::vector<std::string> Transition::serializationNames = { + "INVALID", "EPSILON", "RANGE", "RULE", "PREDICATE", "ATOM", "ACTION", "SET", "NOT_SET", "WILDCARD", "PRECEDENCE" +}; + +Transition::Transition(ATNState *target) { + if (target == nullptr) { + throw NullPointerException("target cannot be null."); + } + + this->target = target; +} + +Transition::~Transition() { +} + +bool Transition::isEpsilon() const { + return false; +} + +misc::IntervalSet Transition::label() const { + return misc::IntervalSet::EMPTY_SET; +} + +std::string Transition::toString() const { + std::stringstream ss; + ss << "(Transition " << std::hex << this << ", target: " << std::hex << target << ')'; + + return ss.str(); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.h new file mode 100644 index 0000000..ffed2f5 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/Transition.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// An ATN transition between any two ATN states. Subclasses define + /// atom, set, epsilon, action, predicate, rule transitions. + /// <p/> + /// This is a one way link. It emanates from a state (usually via a list of + /// transitions) and has a target state. + /// <p/> + /// Since we never have to change the ATN transitions once we construct it, + /// we can fix these transitions as specific classes. The DFA transitions + /// on the other hand need to update the labels as it adds transitions to + /// the states. We'll use the term Edge for the DFA to distinguish them from + /// ATN transitions. + /// </summary> + class ANTLR4CPP_PUBLIC Transition { + public: + // constants for serialization + enum SerializationType { + EPSILON = 1, + RANGE = 2, + RULE = 3, + PREDICATE = 4, // e.g., {isType(input.LT(1))}? + ATOM = 5, + ACTION = 6, + SET = 7, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8, + WILDCARD = 9, + PRECEDENCE = 10, + }; + + static const std::vector<std::string> serializationNames; + + /// The target of this transition. + // ml: this is a reference into the ATN. + ATNState *target; + + virtual ~Transition(); + + protected: + Transition(ATNState *target); + + public: + virtual SerializationType getSerializationType() const = 0; + + /** + * Determines if the transition is an "epsilon" transition. + * + * <p>The default implementation returns {@code false}.</p> + * + * @return {@code true} if traversing this transition in the ATN does not + * consume an input symbol; otherwise, {@code false} if traversing this + * transition consumes (matches) an input symbol. + */ + virtual bool isEpsilon() const; + virtual misc::IntervalSet label() const; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const = 0; + + virtual std::string toString() const; + + Transition(Transition const&) = delete; + Transition& operator=(Transition const&) = delete; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.cpp new file mode 100644 index 0000000..dc47413 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNState.h" + +#include "atn/WildcardTransition.h" + +using namespace antlr4::atn; + +WildcardTransition::WildcardTransition(ATNState *target) : Transition(target) { +} + +Transition::SerializationType WildcardTransition::getSerializationType() const { + return WILDCARD; +} + +bool WildcardTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; +} + +std::string WildcardTransition::toString() const { + return "WILDCARD " + Transition::toString() + " {}"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.h new file mode 100644 index 0000000..c47c717 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/WildcardTransition.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC WildcardTransition final : public Transition { + public: + WildcardTransition(ATNState *target); + + virtual SerializationType getSerializationType() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 |
