Add Lexer class; Move LexerDFA to Lexer::DFA

This commit is contained in:
Josh Holtrop 2021-08-18 17:09:45 -04:00
parent 28591907c1
commit 9459883e74
6 changed files with 99 additions and 91 deletions

View File

@ -9,7 +9,8 @@ require_relative "imbecile/generator"
require_relative "imbecile/grammar"
require_relative "imbecile/grammar/rule"
require_relative "imbecile/grammar/token"
require_relative "imbecile/lexer_dfa"
require_relative "imbecile/lexer"
require_relative "imbecile/lexer/dfa"
require_relative "imbecile/regex"
require_relative "imbecile/regex/nfa"
require_relative "imbecile/regex/unit"

View File

@ -25,7 +25,7 @@ module Imbecile
unless rule_names["Start"]
raise Error.new("Start rule not found")
end
lexer_dfa = LexerDFA.new(@grammar.tokens)
lexer_dfa = Lexer::DFA.new(@grammar.tokens)
classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
result = erb.result(binding.clone)

5
lib/imbecile/lexer.rb Normal file
View File

@ -0,0 +1,5 @@
module Imbecile
class Lexer
end
end

89
lib/imbecile/lexer/dfa.rb Normal file
View File

@ -0,0 +1,89 @@
module Imbecile
class Lexer
class DFA < FA
def initialize(tokens)
super()
start_nfa = Regex::NFA.new
tokens.each do |token|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
end
@nfa_state_sets = {}
@states = []
@to_process = Set.new
nil_transition_states = start_nfa.start_state.nil_transition_states
register_nfa_state_set(nil_transition_states)
while @to_process.size > 0
state_set = @to_process.first
@to_process.delete(state_set)
process_nfa_state_set(state_set)
end
@start_state = @states[0]
end
private
def register_nfa_state_set(nfa_state_set)
unless @nfa_state_sets.include?(nfa_state_set)
state_id = @states.size
@nfa_state_sets[nfa_state_set] = state_id
@states << State.new
@to_process << nfa_state_set
end
end
def process_nfa_state_set(nfa_state_set)
state_id = @nfa_state_sets[nfa_state_set]
state = @states[state_id]
if state_id > 0
nfa_state_set.each do |nfa_state|
if nfa_state.accepts
if state.accepts
if nfa_state.accepts.id < state.accepts.id
state.accepts = nfa_state.accepts
end
else
state.accepts = nfa_state.accepts
end
end
end
end
transitions = transitions_for(nfa_state_set)
while transitions.size > 0
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
if transition.code_point_range.include?(subrange)
result << transition.destination
end
result
end
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
result + dest_nfa_state.nil_transition_states
end
register_nfa_state_set(dest_nfa_states)
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
state.add_transition(subrange, dest_state)
transitions.delete_if do |transition|
transition.code_point_range.last <= subrange.last
end
transitions.map! do |transition|
if transition.code_point_range.first <= subrange.last
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
else
transition
end
end
end
end
def transitions_for(nfa_state_set)
nfa_state_set.reduce([]) do |result, state|
result + state.cp_transitions
end
end
end
end
end

View File

@ -1,87 +0,0 @@
module Imbecile
class LexerDFA < FA
def initialize(tokens)
super()
start_nfa = Regex::NFA.new
tokens.each do |token|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
end
@nfa_state_sets = {}
@states = []
@to_process = Set.new
nil_transition_states = start_nfa.start_state.nil_transition_states
register_nfa_state_set(nil_transition_states)
while @to_process.size > 0
state_set = @to_process.first
@to_process.delete(state_set)
process_nfa_state_set(state_set)
end
@start_state = @states[0]
end
private
def register_nfa_state_set(nfa_state_set)
unless @nfa_state_sets.include?(nfa_state_set)
state_id = @states.size
@nfa_state_sets[nfa_state_set] = state_id
@states << State.new
@to_process << nfa_state_set
end
end
def process_nfa_state_set(nfa_state_set)
state_id = @nfa_state_sets[nfa_state_set]
state = @states[state_id]
if state_id > 0
nfa_state_set.each do |nfa_state|
if nfa_state.accepts
if state.accepts
if nfa_state.accepts.id < state.accepts.id
state.accepts = nfa_state.accepts
end
else
state.accepts = nfa_state.accepts
end
end
end
end
transitions = transitions_for(nfa_state_set)
while transitions.size > 0
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
if transition.code_point_range.include?(subrange)
result << transition.destination
end
result
end
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
result + dest_nfa_state.nil_transition_states
end
register_nfa_state_set(dest_nfa_states)
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
state.add_transition(subrange, dest_state)
transitions.delete_if do |transition|
transition.code_point_range.last <= subrange.last
end
transitions.map! do |transition|
if transition.code_point_range.first <= subrange.last
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
else
transition
end
end
end
end
def transitions_for(nfa_state_set)
nfa_state_set.reduce([]) do |result, state|
result + state.cp_transitions
end
end
end
end

View File

@ -51,12 +51,12 @@ end
def run(grammar, input)
g = Imbecile::Grammar.new(grammar)
token_dfa = Imbecile::LexerDFA.new(g.tokens)
token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input)
end
describe Imbecile::LexerDFA do
describe Imbecile::Lexer::DFA do
it "lexes a simple token" do
expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
token foo