Add Lexer class; Move LexerDFA to Lexer::DFA
This commit is contained in:
parent
28591907c1
commit
9459883e74
@ -9,7 +9,8 @@ require_relative "imbecile/generator"
|
|||||||
require_relative "imbecile/grammar"
|
require_relative "imbecile/grammar"
|
||||||
require_relative "imbecile/grammar/rule"
|
require_relative "imbecile/grammar/rule"
|
||||||
require_relative "imbecile/grammar/token"
|
require_relative "imbecile/grammar/token"
|
||||||
require_relative "imbecile/lexer_dfa"
|
require_relative "imbecile/lexer"
|
||||||
|
require_relative "imbecile/lexer/dfa"
|
||||||
require_relative "imbecile/regex"
|
require_relative "imbecile/regex"
|
||||||
require_relative "imbecile/regex/nfa"
|
require_relative "imbecile/regex/nfa"
|
||||||
require_relative "imbecile/regex/unit"
|
require_relative "imbecile/regex/unit"
|
||||||
|
@ -25,7 +25,7 @@ module Imbecile
|
|||||||
unless rule_names["Start"]
|
unless rule_names["Start"]
|
||||||
raise Error.new("Start rule not found")
|
raise Error.new("Start rule not found")
|
||||||
end
|
end
|
||||||
lexer_dfa = LexerDFA.new(@grammar.tokens)
|
lexer_dfa = Lexer::DFA.new(@grammar.tokens)
|
||||||
classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
|
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
|
||||||
result = erb.result(binding.clone)
|
result = erb.result(binding.clone)
|
||||||
|
5
lib/imbecile/lexer.rb
Normal file
5
lib/imbecile/lexer.rb
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
module Imbecile
|
||||||
|
class Lexer
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
89
lib/imbecile/lexer/dfa.rb
Normal file
89
lib/imbecile/lexer/dfa.rb
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
module Imbecile
|
||||||
|
class Lexer
|
||||||
|
|
||||||
|
class DFA < FA
|
||||||
|
|
||||||
|
def initialize(tokens)
|
||||||
|
super()
|
||||||
|
start_nfa = Regex::NFA.new
|
||||||
|
tokens.each do |token|
|
||||||
|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
||||||
|
end
|
||||||
|
@nfa_state_sets = {}
|
||||||
|
@states = []
|
||||||
|
@to_process = Set.new
|
||||||
|
nil_transition_states = start_nfa.start_state.nil_transition_states
|
||||||
|
register_nfa_state_set(nil_transition_states)
|
||||||
|
while @to_process.size > 0
|
||||||
|
state_set = @to_process.first
|
||||||
|
@to_process.delete(state_set)
|
||||||
|
process_nfa_state_set(state_set)
|
||||||
|
end
|
||||||
|
@start_state = @states[0]
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def register_nfa_state_set(nfa_state_set)
|
||||||
|
unless @nfa_state_sets.include?(nfa_state_set)
|
||||||
|
state_id = @states.size
|
||||||
|
@nfa_state_sets[nfa_state_set] = state_id
|
||||||
|
@states << State.new
|
||||||
|
@to_process << nfa_state_set
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def process_nfa_state_set(nfa_state_set)
|
||||||
|
state_id = @nfa_state_sets[nfa_state_set]
|
||||||
|
state = @states[state_id]
|
||||||
|
if state_id > 0
|
||||||
|
nfa_state_set.each do |nfa_state|
|
||||||
|
if nfa_state.accepts
|
||||||
|
if state.accepts
|
||||||
|
if nfa_state.accepts.id < state.accepts.id
|
||||||
|
state.accepts = nfa_state.accepts
|
||||||
|
end
|
||||||
|
else
|
||||||
|
state.accepts = nfa_state.accepts
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
transitions = transitions_for(nfa_state_set)
|
||||||
|
while transitions.size > 0
|
||||||
|
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
|
||||||
|
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
|
||||||
|
if transition.code_point_range.include?(subrange)
|
||||||
|
result << transition.destination
|
||||||
|
end
|
||||||
|
result
|
||||||
|
end
|
||||||
|
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
|
||||||
|
result + dest_nfa_state.nil_transition_states
|
||||||
|
end
|
||||||
|
register_nfa_state_set(dest_nfa_states)
|
||||||
|
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
|
||||||
|
state.add_transition(subrange, dest_state)
|
||||||
|
transitions.delete_if do |transition|
|
||||||
|
transition.code_point_range.last <= subrange.last
|
||||||
|
end
|
||||||
|
transitions.map! do |transition|
|
||||||
|
if transition.code_point_range.first <= subrange.last
|
||||||
|
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
|
||||||
|
else
|
||||||
|
transition
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def transitions_for(nfa_state_set)
|
||||||
|
nfa_state_set.reduce([]) do |result, state|
|
||||||
|
result + state.cp_transitions
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
@ -1,87 +0,0 @@
|
|||||||
module Imbecile
|
|
||||||
|
|
||||||
class LexerDFA < FA
|
|
||||||
|
|
||||||
def initialize(tokens)
|
|
||||||
super()
|
|
||||||
start_nfa = Regex::NFA.new
|
|
||||||
tokens.each do |token|
|
|
||||||
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
|
||||||
end
|
|
||||||
@nfa_state_sets = {}
|
|
||||||
@states = []
|
|
||||||
@to_process = Set.new
|
|
||||||
nil_transition_states = start_nfa.start_state.nil_transition_states
|
|
||||||
register_nfa_state_set(nil_transition_states)
|
|
||||||
while @to_process.size > 0
|
|
||||||
state_set = @to_process.first
|
|
||||||
@to_process.delete(state_set)
|
|
||||||
process_nfa_state_set(state_set)
|
|
||||||
end
|
|
||||||
@start_state = @states[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def register_nfa_state_set(nfa_state_set)
|
|
||||||
unless @nfa_state_sets.include?(nfa_state_set)
|
|
||||||
state_id = @states.size
|
|
||||||
@nfa_state_sets[nfa_state_set] = state_id
|
|
||||||
@states << State.new
|
|
||||||
@to_process << nfa_state_set
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_nfa_state_set(nfa_state_set)
|
|
||||||
state_id = @nfa_state_sets[nfa_state_set]
|
|
||||||
state = @states[state_id]
|
|
||||||
if state_id > 0
|
|
||||||
nfa_state_set.each do |nfa_state|
|
|
||||||
if nfa_state.accepts
|
|
||||||
if state.accepts
|
|
||||||
if nfa_state.accepts.id < state.accepts.id
|
|
||||||
state.accepts = nfa_state.accepts
|
|
||||||
end
|
|
||||||
else
|
|
||||||
state.accepts = nfa_state.accepts
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
transitions = transitions_for(nfa_state_set)
|
|
||||||
while transitions.size > 0
|
|
||||||
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
|
|
||||||
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
|
|
||||||
if transition.code_point_range.include?(subrange)
|
|
||||||
result << transition.destination
|
|
||||||
end
|
|
||||||
result
|
|
||||||
end
|
|
||||||
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
|
|
||||||
result + dest_nfa_state.nil_transition_states
|
|
||||||
end
|
|
||||||
register_nfa_state_set(dest_nfa_states)
|
|
||||||
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
|
|
||||||
state.add_transition(subrange, dest_state)
|
|
||||||
transitions.delete_if do |transition|
|
|
||||||
transition.code_point_range.last <= subrange.last
|
|
||||||
end
|
|
||||||
transitions.map! do |transition|
|
|
||||||
if transition.code_point_range.first <= subrange.last
|
|
||||||
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
|
|
||||||
else
|
|
||||||
transition
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def transitions_for(nfa_state_set)
|
|
||||||
nfa_state_set.reduce([]) do |result, state|
|
|
||||||
result + state.cp_transitions
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
@ -51,12 +51,12 @@ end
|
|||||||
|
|
||||||
def run(grammar, input)
|
def run(grammar, input)
|
||||||
g = Imbecile::Grammar.new(grammar)
|
g = Imbecile::Grammar.new(grammar)
|
||||||
token_dfa = Imbecile::LexerDFA.new(g.tokens)
|
token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
|
||||||
test_lexer = TestLexer.new(token_dfa)
|
test_lexer = TestLexer.new(token_dfa)
|
||||||
test_lexer.lex(input)
|
test_lexer.lex(input)
|
||||||
end
|
end
|
||||||
|
|
||||||
describe Imbecile::LexerDFA do
|
describe Imbecile::Lexer::DFA do
|
||||||
it "lexes a simple token" do
|
it "lexes a simple token" do
|
||||||
expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
|
expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
|
||||||
token foo
|
token foo
|
Loading…
x
Reference in New Issue
Block a user