From 38ae5ac7a1591747b81d9a133dc44a5396249c30 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Thu, 15 Sep 2022 22:46:44 -0400 Subject: [PATCH] Split Token class into Token/Pattern --- lib/propane.rb | 1 + lib/propane/generator.rb | 2 +- lib/propane/grammar.rb | 11 ++++--- lib/propane/lexer.rb | 6 ++-- lib/propane/lexer/dfa.rb | 8 ++--- lib/propane/pattern.rb | 53 ++++++++++++++++++++++++++++++++++ lib/propane/token.rb | 24 --------------- spec/propane/lexer/dfa_spec.rb | 5 ++-- 8 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 lib/propane/pattern.rb diff --git a/lib/propane.rb b/lib/propane.rb index 37236d3..2de0f54 100644 --- a/lib/propane.rb +++ b/lib/propane.rb @@ -13,6 +13,7 @@ require_relative "propane/lexer/dfa" require_relative "propane/parser" require_relative "propane/parser/item" require_relative "propane/parser/item_set" +require_relative "propane/pattern" require_relative "propane/regex" require_relative "propane/regex/nfa" require_relative "propane/regex/unit" diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 0b3456b..c086cf5 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -61,7 +61,7 @@ class Propane end end determine_possibly_empty_rulesets!(rule_sets) - @lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens) + @lexer = Lexer.new(@grammar.patterns) @parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log) end diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 9015cc6..60a5bd3 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -3,14 +3,14 @@ class Propane class Grammar attr_reader :classname - attr_reader :drop_tokens attr_reader :modulename + attr_reader :patterns attr_reader :rules attr_reader :tokens def initialize(input) + @patterns = [] @tokens = [] - @drop_tokens = [] @rules = [] input = input.gsub("\r\n", "\n") parse_grammar(input) @@ -37,10 +37,13 @@ class Propane unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ raise Error.new("Invalid token name #{name.inspect}") end - @tokens << Token.new(name: name, pattern: pattern, id: @tokens.size, line_number: line_number) + token = Token.new(name: name, id: @tokens.size, line_number: line_number) + @tokens << token + pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number) + @patterns << pattern elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/) pattern = $1 - @drop_tokens << Token.new(pattern: pattern, line_number: line_number) + @patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true) elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m) rule_name, components, code = $1, $2, $3 unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ diff --git a/lib/propane/lexer.rb b/lib/propane/lexer.rb index 7dc5921..0968308 100644 --- a/lib/propane/lexer.rb +++ b/lib/propane/lexer.rb @@ -5,8 +5,8 @@ class Propane # Lexer DFA. attr_accessor :dfa - def initialize(tokens, drop_tokens) - @dfa = DFA.new(tokens, drop_tokens) + def initialize(patterns) + @dfa = DFA.new(patterns) end def build_tables @@ -20,7 +20,7 @@ class Propane elsif state.accepts.drop? TOKEN_DROP else - state.accepts.id + state.accepts.token.id end state_table << { transition_table_index: transition_table.size, diff --git a/lib/propane/lexer/dfa.rb b/lib/propane/lexer/dfa.rb index b10dee7..68a8bc2 100644 --- a/lib/propane/lexer/dfa.rb +++ b/lib/propane/lexer/dfa.rb @@ -3,11 +3,11 @@ class Propane class DFA < FA - def initialize(tokens, drop_tokens) + def initialize(patterns) super() start_nfa = Regex::NFA.new - (tokens + drop_tokens).each do |token| - start_nfa.start_state.add_transition(nil, token.nfa.start_state) + patterns.each do |pattern| + start_nfa.start_state.add_transition(nil, pattern.nfa.start_state) end @nfa_state_sets = {} @states = [] @@ -40,7 +40,7 @@ class Propane nfa_state_set.each do |nfa_state| if nfa_state.accepts if state.accepts - if nfa_state.accepts.id < state.accepts.id + if nfa_state.accepts.line_number < state.accepts.line_number state.accepts = nfa_state.accepts end else diff --git a/lib/propane/pattern.rb b/lib/propane/pattern.rb new file mode 100644 index 0000000..2e9ebae --- /dev/null +++ b/lib/propane/pattern.rb @@ -0,0 +1,53 @@ +class Propane + + class Pattern + + # @return [String, nil] + # Pattern. + attr_reader :pattern + + # @return [Token, nil] + # Token to be returned by this pattern. + attr_reader :token + + # @return [Integer, nil] + # Line number where the pattern was defined in the input grammar. + attr_reader :line_number + + # @return [Regex::NFA, nil] + # Regex NFA for matching the pattern. + attr_reader :nfa + + # Construct a Pattern. + # + # @param options [Hash] + # Optional parameters. + # @option options [Boolean] :drop + # Whether this is a drop pattern. + # @option options [String, nil] :pattern + # Pattern. + # @option options [Token, nil] :token + # Token to be returned by this pattern. + # @option options [Integer, nil] :line_number + # Line number where the token was defined in the input grammar. + def initialize(options) + @drop = options[:drop] + @pattern = options[:pattern] + @token = options[:token] + @line_number = options[:line_number] + regex = Regex.new(@pattern) + regex.nfa.end_state.accepts = self + @nfa = regex.nfa + end + + # Whether the pattern is a drop pattern. + # + # @return [Boolean] + # Whether the pattern is a drop pattern. + def drop? + @drop + end + + end + +end diff --git a/lib/propane/token.rb b/lib/propane/token.rb index b40af45..52e6774 100644 --- a/lib/propane/token.rb +++ b/lib/propane/token.rb @@ -6,10 +6,6 @@ class Propane # Token name. attr_reader :name - # @return [String, nil] - # Token pattern. - attr_reader :pattern - # @return [Integer, nil] # Token ID. attr_reader :id @@ -18,46 +14,26 @@ class Propane # Line number where the token was defined in the input grammar. attr_reader :line_number - # @return [Regex::NFA, nil] - # Regex NFA for matching the token. - attr_reader :nfa - # Construct a Token. # # @param options [Hash] # Optional parameters. # @option options [String, nil] :name # Token name. - # @option options [String, nil] :pattern - # Token pattern. # @option options [Integer, nil] :id # Token ID. # @option options [Integer, nil] :line_number # Line number where the token was defined in the input grammar. def initialize(options) @name = options[:name] - @pattern = options[:pattern] @id = options[:id] @line_number = options[:line_number] - unless @pattern.nil? - regex = Regex.new(@pattern) - regex.nfa.end_state.accepts = self - @nfa = regex.nfa - end end def c_name @name.upcase end - # Whether the token is a drop token. - # - # @return [Boolean] - # Whether the token is a drop token. - def drop? - @name.nil? - end - def to_s @name end diff --git a/spec/propane/lexer/dfa_spec.rb b/spec/propane/lexer/dfa_spec.rb index bb8b177..5fab585 100644 --- a/spec/propane/lexer/dfa_spec.rb +++ b/spec/propane/lexer/dfa_spec.rb @@ -35,7 +35,8 @@ class TestLexer end end if last_accepts - [last_accepts.name, last_s] + name = last_accepts.token ? last_accepts.token.name : nil + [name, last_s] end end @@ -51,7 +52,7 @@ end def run(grammar, input) grammar = Propane::Grammar.new(grammar) - token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens) + token_dfa = Propane::Lexer::DFA.new(grammar.patterns) test_lexer = TestLexer.new(token_dfa) test_lexer.lex(input) end