Split Token class into Token/Pattern

This commit is contained in:
Josh Holtrop 2022-09-15 22:46:44 -04:00
parent bf075a69f6
commit 38ae5ac7a1
8 changed files with 72 additions and 38 deletions

View File

@ -13,6 +13,7 @@ require_relative "propane/lexer/dfa"
require_relative "propane/parser"
require_relative "propane/parser/item"
require_relative "propane/parser/item_set"
require_relative "propane/pattern"
require_relative "propane/regex"
require_relative "propane/regex/nfa"
require_relative "propane/regex/unit"

View File

@ -61,7 +61,7 @@ class Propane
end
end
determine_possibly_empty_rulesets!(rule_sets)
@lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens)
@lexer = Lexer.new(@grammar.patterns)
@parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log)
end

View File

@ -3,14 +3,14 @@ class Propane
class Grammar
attr_reader :classname
attr_reader :drop_tokens
attr_reader :modulename
attr_reader :patterns
attr_reader :rules
attr_reader :tokens
def initialize(input)
@patterns = []
@tokens = []
@drop_tokens = []
@rules = []
input = input.gsub("\r\n", "\n")
parse_grammar(input)
@ -37,10 +37,13 @@ class Propane
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name.inspect}")
end
@tokens << Token.new(name: name, pattern: pattern, id: @tokens.size, line_number: line_number)
token = Token.new(name: name, id: @tokens.size, line_number: line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number)
@patterns << pattern
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
pattern = $1
@drop_tokens << Token.new(pattern: pattern, line_number: line_number)
@patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m)
rule_name, components, code = $1, $2, $3
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/

View File

@ -5,8 +5,8 @@ class Propane
# Lexer DFA.
attr_accessor :dfa
def initialize(tokens, drop_tokens)
@dfa = DFA.new(tokens, drop_tokens)
def initialize(patterns)
@dfa = DFA.new(patterns)
end
def build_tables
@ -20,7 +20,7 @@ class Propane
elsif state.accepts.drop?
TOKEN_DROP
else
state.accepts.id
state.accepts.token.id
end
state_table << {
transition_table_index: transition_table.size,

View File

@ -3,11 +3,11 @@ class Propane
class DFA < FA
def initialize(tokens, drop_tokens)
def initialize(patterns)
super()
start_nfa = Regex::NFA.new
(tokens + drop_tokens).each do |token|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
patterns.each do |pattern|
start_nfa.start_state.add_transition(nil, pattern.nfa.start_state)
end
@nfa_state_sets = {}
@states = []
@ -40,7 +40,7 @@ class Propane
nfa_state_set.each do |nfa_state|
if nfa_state.accepts
if state.accepts
if nfa_state.accepts.id < state.accepts.id
if nfa_state.accepts.line_number < state.accepts.line_number
state.accepts = nfa_state.accepts
end
else

53
lib/propane/pattern.rb Normal file
View File

@ -0,0 +1,53 @@
class Propane
class Pattern
# @return [String, nil]
# Pattern.
attr_reader :pattern
# @return [Token, nil]
# Token to be returned by this pattern.
attr_reader :token
# @return [Integer, nil]
# Line number where the pattern was defined in the input grammar.
attr_reader :line_number
# @return [Regex::NFA, nil]
# Regex NFA for matching the pattern.
attr_reader :nfa
# Construct a Pattern.
#
# @param options [Hash]
# Optional parameters.
# @option options [Boolean] :drop
# Whether this is a drop pattern.
# @option options [String, nil] :pattern
# Pattern.
# @option options [Token, nil] :token
# Token to be returned by this pattern.
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
def initialize(options)
@drop = options[:drop]
@pattern = options[:pattern]
@token = options[:token]
@line_number = options[:line_number]
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa
end
# Whether the pattern is a drop pattern.
#
# @return [Boolean]
# Whether the pattern is a drop pattern.
def drop?
@drop
end
end
end

View File

@ -6,10 +6,6 @@ class Propane
# Token name.
attr_reader :name
# @return [String, nil]
# Token pattern.
attr_reader :pattern
# @return [Integer, nil]
# Token ID.
attr_reader :id
@ -18,46 +14,26 @@ class Propane
# Line number where the token was defined in the input grammar.
attr_reader :line_number
# @return [Regex::NFA, nil]
# Regex NFA for matching the token.
attr_reader :nfa
# Construct a Token.
#
# @param options [Hash]
# Optional parameters.
# @option options [String, nil] :name
# Token name.
# @option options [String, nil] :pattern
# Token pattern.
# @option options [Integer, nil] :id
# Token ID.
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
def initialize(options)
@name = options[:name]
@pattern = options[:pattern]
@id = options[:id]
@line_number = options[:line_number]
unless @pattern.nil?
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa
end
end
def c_name
@name.upcase
end
# Whether the token is a drop token.
#
# @return [Boolean]
# Whether the token is a drop token.
def drop?
@name.nil?
end
def to_s
@name
end

View File

@ -35,7 +35,8 @@ class TestLexer
end
end
if last_accepts
[last_accepts.name, last_s]
name = last_accepts.token ? last_accepts.token.name : nil
[name, last_s]
end
end
@ -51,7 +52,7 @@ end
def run(grammar, input)
grammar = Propane::Grammar.new(grammar)
token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens)
token_dfa = Propane::Lexer::DFA.new(grammar.patterns)
test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input)
end