Split Token class into Token/Pattern

This commit is contained in:
Josh Holtrop 2022-09-15 22:46:44 -04:00
parent bf075a69f6
commit 38ae5ac7a1
8 changed files with 72 additions and 38 deletions

View File

@ -13,6 +13,7 @@ require_relative "propane/lexer/dfa"
require_relative "propane/parser" require_relative "propane/parser"
require_relative "propane/parser/item" require_relative "propane/parser/item"
require_relative "propane/parser/item_set" require_relative "propane/parser/item_set"
require_relative "propane/pattern"
require_relative "propane/regex" require_relative "propane/regex"
require_relative "propane/regex/nfa" require_relative "propane/regex/nfa"
require_relative "propane/regex/unit" require_relative "propane/regex/unit"

View File

@ -61,7 +61,7 @@ class Propane
end end
end end
determine_possibly_empty_rulesets!(rule_sets) determine_possibly_empty_rulesets!(rule_sets)
@lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens) @lexer = Lexer.new(@grammar.patterns)
@parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log) @parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log)
end end

View File

@ -3,14 +3,14 @@ class Propane
class Grammar class Grammar
attr_reader :classname attr_reader :classname
attr_reader :drop_tokens
attr_reader :modulename attr_reader :modulename
attr_reader :patterns
attr_reader :rules attr_reader :rules
attr_reader :tokens attr_reader :tokens
def initialize(input) def initialize(input)
@patterns = []
@tokens = [] @tokens = []
@drop_tokens = []
@rules = [] @rules = []
input = input.gsub("\r\n", "\n") input = input.gsub("\r\n", "\n")
parse_grammar(input) parse_grammar(input)
@ -37,10 +37,13 @@ class Propane
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name.inspect}") raise Error.new("Invalid token name #{name.inspect}")
end end
@tokens << Token.new(name: name, pattern: pattern, id: @tokens.size, line_number: line_number) token = Token.new(name: name, id: @tokens.size, line_number: line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number)
@patterns << pattern
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/) elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
pattern = $1 pattern = $1
@drop_tokens << Token.new(pattern: pattern, line_number: line_number) @patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m) elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m)
rule_name, components, code = $1, $2, $3 rule_name, components, code = $1, $2, $3
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/

View File

@ -5,8 +5,8 @@ class Propane
# Lexer DFA. # Lexer DFA.
attr_accessor :dfa attr_accessor :dfa
def initialize(tokens, drop_tokens) def initialize(patterns)
@dfa = DFA.new(tokens, drop_tokens) @dfa = DFA.new(patterns)
end end
def build_tables def build_tables
@ -20,7 +20,7 @@ class Propane
elsif state.accepts.drop? elsif state.accepts.drop?
TOKEN_DROP TOKEN_DROP
else else
state.accepts.id state.accepts.token.id
end end
state_table << { state_table << {
transition_table_index: transition_table.size, transition_table_index: transition_table.size,

View File

@ -3,11 +3,11 @@ class Propane
class DFA < FA class DFA < FA
def initialize(tokens, drop_tokens) def initialize(patterns)
super() super()
start_nfa = Regex::NFA.new start_nfa = Regex::NFA.new
(tokens + drop_tokens).each do |token| patterns.each do |pattern|
start_nfa.start_state.add_transition(nil, token.nfa.start_state) start_nfa.start_state.add_transition(nil, pattern.nfa.start_state)
end end
@nfa_state_sets = {} @nfa_state_sets = {}
@states = [] @states = []
@ -40,7 +40,7 @@ class Propane
nfa_state_set.each do |nfa_state| nfa_state_set.each do |nfa_state|
if nfa_state.accepts if nfa_state.accepts
if state.accepts if state.accepts
if nfa_state.accepts.id < state.accepts.id if nfa_state.accepts.line_number < state.accepts.line_number
state.accepts = nfa_state.accepts state.accepts = nfa_state.accepts
end end
else else

53
lib/propane/pattern.rb Normal file
View File

@ -0,0 +1,53 @@
class Propane
class Pattern
# @return [String, nil]
# Pattern.
attr_reader :pattern
# @return [Token, nil]
# Token to be returned by this pattern.
attr_reader :token
# @return [Integer, nil]
# Line number where the pattern was defined in the input grammar.
attr_reader :line_number
# @return [Regex::NFA, nil]
# Regex NFA for matching the pattern.
attr_reader :nfa
# Construct a Pattern.
#
# @param options [Hash]
# Optional parameters.
# @option options [Boolean] :drop
# Whether this is a drop pattern.
# @option options [String, nil] :pattern
# Pattern.
# @option options [Token, nil] :token
# Token to be returned by this pattern.
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
def initialize(options)
@drop = options[:drop]
@pattern = options[:pattern]
@token = options[:token]
@line_number = options[:line_number]
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa
end
# Whether the pattern is a drop pattern.
#
# @return [Boolean]
# Whether the pattern is a drop pattern.
def drop?
@drop
end
end
end

View File

@ -6,10 +6,6 @@ class Propane
# Token name. # Token name.
attr_reader :name attr_reader :name
# @return [String, nil]
# Token pattern.
attr_reader :pattern
# @return [Integer, nil] # @return [Integer, nil]
# Token ID. # Token ID.
attr_reader :id attr_reader :id
@ -18,46 +14,26 @@ class Propane
# Line number where the token was defined in the input grammar. # Line number where the token was defined in the input grammar.
attr_reader :line_number attr_reader :line_number
# @return [Regex::NFA, nil]
# Regex NFA for matching the token.
attr_reader :nfa
# Construct a Token. # Construct a Token.
# #
# @param options [Hash] # @param options [Hash]
# Optional parameters. # Optional parameters.
# @option options [String, nil] :name # @option options [String, nil] :name
# Token name. # Token name.
# @option options [String, nil] :pattern
# Token pattern.
# @option options [Integer, nil] :id # @option options [Integer, nil] :id
# Token ID. # Token ID.
# @option options [Integer, nil] :line_number # @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar. # Line number where the token was defined in the input grammar.
def initialize(options) def initialize(options)
@name = options[:name] @name = options[:name]
@pattern = options[:pattern]
@id = options[:id] @id = options[:id]
@line_number = options[:line_number] @line_number = options[:line_number]
unless @pattern.nil?
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa
end
end end
def c_name def c_name
@name.upcase @name.upcase
end end
# Whether the token is a drop token.
#
# @return [Boolean]
# Whether the token is a drop token.
def drop?
@name.nil?
end
def to_s def to_s
@name @name
end end

View File

@ -35,7 +35,8 @@ class TestLexer
end end
end end
if last_accepts if last_accepts
[last_accepts.name, last_s] name = last_accepts.token ? last_accepts.token.name : nil
[name, last_s]
end end
end end
@ -51,7 +52,7 @@ end
def run(grammar, input) def run(grammar, input)
grammar = Propane::Grammar.new(grammar) grammar = Propane::Grammar.new(grammar)
token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens) token_dfa = Propane::Lexer::DFA.new(grammar.patterns)
test_lexer = TestLexer.new(token_dfa) test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input) test_lexer.lex(input)
end end