From 2122ca02fe5b6c5eb372b01619e41c7d57d8553a Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 23 Jun 2021 23:15:02 -0400 Subject: [PATCH] Start generating lexer states and transitions --- assets/parser.d.erb | 29 +++++++++++++++++++++++++++++ lib/imbecile/regex/fa.rb | 25 ++++++++++++++----------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 4bcca7e..7c9e4fd 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -10,4 +10,33 @@ class <%= classname %> TOKEN_<%= token.c_name %>, <% end %> } + + private struct Transition + { + uint first; + uint last; + uint dest; + } + + private struct LexerState + { + Transition[] transitions; + size_t accepts; + } + + private static const LexerState lexer_states[] = [ +<% lexer_dfa.enumerate.each do |state, index| %> + LexerState([ +<% state.transitions.each do |transition| %> + Transition(<%= transition.code_point_range.first %>, <%= transition.code_point_range.last %>, <%= lexer_dfa.enumerate[transition.destination] %>), +<% end %> + ], +<% if state.accepts %> + <%= state.accepts.id %>, +<% else %> + cast(size_t)-1, +<% end %> + ), +<% end %> + ]; } diff --git a/lib/imbecile/regex/fa.rb b/lib/imbecile/regex/fa.rb index 1115cac..afe2017 100644 --- a/lib/imbecile/regex/fa.rb +++ b/lib/imbecile/regex/fa.rb @@ -39,19 +39,22 @@ module Imbecile end def enumerate - id = 0 - states = {} - visit = lambda do |state| - unless states.include?(state) - id += 1 - states[state] = id - state.transitions.each do |transition| - visit[transition.destination] + @_enumerated ||= + begin + id = 0 + states = {} + visit = lambda do |state| + unless states.include?(state) + id += 1 + states[state] = id + state.transitions.each do |transition| + visit[transition.destination] + end + end end + visit[@start_state] + states end - end - visit[@start_state] - states end end