Add lexer modes and $mode() code expansion
This commit is contained in:
parent
b2d11321fe
commit
02be6de48e
@ -124,14 +124,19 @@ class <%= @classname %>
|
|||||||
uint code_id;
|
uint code_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
<% transition_table, state_table = @lexer.build_tables %>
|
private struct Mode
|
||||||
|
{
|
||||||
|
uint state_table_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
<% transition_table, state_table, mode_table = @lexer.build_tables %>
|
||||||
private static immutable Transition transitions[] = [
|
private static immutable Transition transitions[] = [
|
||||||
<% transition_table.each do |transition_table_entry| %>
|
<% transition_table.each do |transition_table_entry| %>
|
||||||
Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
|
Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
|
||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
private static const State states[] = [
|
private static immutable State states[] = [
|
||||||
<% state_table.each do |state_table_entry| %>
|
<% state_table.each do |state_table_entry| %>
|
||||||
State(<%= state_table_entry[:transition_table_index] %>u,
|
State(<%= state_table_entry[:transition_table_index] %>u,
|
||||||
<%= state_table_entry[:n_transitions] %>u,
|
<%= state_table_entry[:n_transitions] %>u,
|
||||||
@ -140,6 +145,12 @@ class <%= @classname %>
|
|||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
|
private static immutable Mode modes[] = [
|
||||||
|
<% mode_table.each do |mode_table_entry| %>
|
||||||
|
Mode(<%= mode_table_entry[:state_table_offset] %>),
|
||||||
|
<% end %>
|
||||||
|
];
|
||||||
|
|
||||||
struct LexedToken
|
struct LexedToken
|
||||||
{
|
{
|
||||||
size_t row;
|
size_t row;
|
||||||
@ -153,11 +164,13 @@ class <%= @classname %>
|
|||||||
private size_t m_input_position;
|
private size_t m_input_position;
|
||||||
private size_t m_input_row;
|
private size_t m_input_row;
|
||||||
private size_t m_input_col;
|
private size_t m_input_col;
|
||||||
|
private size_t m_mode;
|
||||||
|
|
||||||
this(const(ubyte) * input, size_t input_length)
|
this(const(ubyte) * input, size_t input_length)
|
||||||
{
|
{
|
||||||
m_input = input;
|
m_input = input;
|
||||||
m_input_length = input_length;
|
m_input_length = input_length;
|
||||||
|
m_mode = <%= @lexer.mode_id("default") %>;
|
||||||
}
|
}
|
||||||
|
|
||||||
LexedToken lex_token()
|
LexedToken lex_token()
|
||||||
@ -211,7 +224,7 @@ class <%= @classname %>
|
|||||||
MatchInfo longest_match_info;
|
MatchInfo longest_match_info;
|
||||||
longest_match_info.token = _TOKEN_COUNT;
|
longest_match_info.token = _TOKEN_COUNT;
|
||||||
MatchInfo attempt_match_info;
|
MatchInfo attempt_match_info;
|
||||||
uint current_state;
|
uint current_state = modes[m_mode].state_table_offset;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_match_info.length], m_input_length - m_input_position - attempt_match_info.length);
|
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_match_info.length], m_input_length - m_input_position - attempt_match_info.length);
|
||||||
|
@ -26,6 +26,17 @@ class Propane
|
|||||||
private
|
private
|
||||||
|
|
||||||
def process_grammar!
|
def process_grammar!
|
||||||
|
# Assign default pattern mode to patterns without a mode assigned.
|
||||||
|
found_default = false
|
||||||
|
@grammar.patterns.each do |pattern|
|
||||||
|
if pattern.mode.nil?
|
||||||
|
pattern.mode = "default"
|
||||||
|
found_default = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
unless found_default
|
||||||
|
raise Error.new("No patterns found for default mode")
|
||||||
|
end
|
||||||
# Add EOF token.
|
# Add EOF token.
|
||||||
@grammar.tokens << Token.new("$EOF", nil)
|
@grammar.tokens << Token.new("$EOF", nil)
|
||||||
tokens_by_name = {}
|
tokens_by_name = {}
|
||||||
@ -152,6 +163,13 @@ class Propane
|
|||||||
def expand_code(code)
|
def expand_code(code)
|
||||||
code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
||||||
"TOKEN_#{Token.code_name($1)}"
|
"TOKEN_#{Token.code_name($1)}"
|
||||||
|
end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
||||||
|
mode_name = $1
|
||||||
|
mode_id = @lexer.mode_id(mode_name)
|
||||||
|
unless mode_id
|
||||||
|
raise Error.new("Lexer mode '#{mode_name}' not found")
|
||||||
|
end
|
||||||
|
"m_mode = #{mode_id}u"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,51 +1,74 @@
|
|||||||
class Propane
|
class Propane
|
||||||
class Lexer
|
class Lexer
|
||||||
|
|
||||||
# @return [DFA]
|
|
||||||
# Lexer DFA.
|
|
||||||
attr_accessor :dfa
|
|
||||||
|
|
||||||
def initialize(grammar)
|
def initialize(grammar)
|
||||||
@grammar = grammar
|
@grammar = grammar
|
||||||
@dfa = DFA.new(grammar.patterns)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def build_tables
|
def build_tables
|
||||||
|
@modes = @grammar.patterns.group_by do |pattern|
|
||||||
|
pattern.mode
|
||||||
|
end.transform_values do |patterns|
|
||||||
|
{dfa: DFA.new(patterns)}
|
||||||
|
end
|
||||||
|
@modes.each_with_index do |(mode_name, mode_info), index|
|
||||||
|
mode_info[:id] = index
|
||||||
|
end
|
||||||
transition_table = []
|
transition_table = []
|
||||||
state_table = []
|
state_table = []
|
||||||
states = @dfa.enumerate
|
mode_table = []
|
||||||
states.each do |state, id|
|
@modes.each do |mode_name, mode_info|
|
||||||
token =
|
state_table_offset = state_table.size
|
||||||
if state.accepts.nil?
|
mode_table << {
|
||||||
@grammar.tokens.size
|
state_table_offset: state_table_offset,
|
||||||
elsif state.accepts.drop?
|
|
||||||
TOKEN_DROP
|
|
||||||
elsif state.accepts.token
|
|
||||||
state.accepts.token.id
|
|
||||||
else
|
|
||||||
@grammar.tokens.size
|
|
||||||
end
|
|
||||||
code_id =
|
|
||||||
if state.accepts && state.accepts.code_id
|
|
||||||
state.accepts.code_id
|
|
||||||
else
|
|
||||||
0xFFFF_FFFF
|
|
||||||
end
|
|
||||||
state_table << {
|
|
||||||
transition_table_index: transition_table.size,
|
|
||||||
n_transitions: state.transitions.size,
|
|
||||||
token: token,
|
|
||||||
code_id: code_id,
|
|
||||||
}
|
}
|
||||||
state.transitions.each do |transition|
|
states = mode_info[:dfa].enumerate
|
||||||
transition_table << {
|
states.each do |state, id|
|
||||||
first: transition.code_point_range.first,
|
token =
|
||||||
last: transition.code_point_range.last,
|
if state.accepts.nil?
|
||||||
destination: states[transition.destination],
|
@grammar.tokens.size
|
||||||
|
elsif state.accepts.drop?
|
||||||
|
TOKEN_DROP
|
||||||
|
elsif state.accepts.token
|
||||||
|
state.accepts.token.id
|
||||||
|
else
|
||||||
|
@grammar.tokens.size
|
||||||
|
end
|
||||||
|
code_id =
|
||||||
|
if state.accepts && state.accepts.code_id
|
||||||
|
state.accepts.code_id
|
||||||
|
else
|
||||||
|
0xFFFF_FFFF
|
||||||
|
end
|
||||||
|
state_table << {
|
||||||
|
transition_table_index: transition_table.size,
|
||||||
|
n_transitions: state.transitions.size,
|
||||||
|
token: token,
|
||||||
|
code_id: code_id,
|
||||||
}
|
}
|
||||||
|
state.transitions.each do |transition|
|
||||||
|
transition_table << {
|
||||||
|
first: transition.code_point_range.first,
|
||||||
|
last: transition.code_point_range.last,
|
||||||
|
destination: states[transition.destination] + state_table_offset,
|
||||||
|
}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
[transition_table, state_table]
|
[transition_table, state_table, mode_table]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get ID for a mode.
|
||||||
|
#
|
||||||
|
# @param mode_name [String]
|
||||||
|
# Mode name.
|
||||||
|
#
|
||||||
|
# @return [Integer, nil]
|
||||||
|
# Mode ID.
|
||||||
|
def mode_id(mode_name)
|
||||||
|
if mode_info = @modes[mode_name]
|
||||||
|
mode_info[:id]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -28,7 +28,7 @@ class Propane
|
|||||||
|
|
||||||
# @return [String, nil]
|
# @return [String, nil]
|
||||||
# Lexer mode for this pattern.
|
# Lexer mode for this pattern.
|
||||||
attr_reader :mode
|
attr_accessor :mode
|
||||||
|
|
||||||
# Construct a Pattern.
|
# Construct a Pattern.
|
||||||
#
|
#
|
||||||
|
@ -192,4 +192,37 @@ EOF
|
|||||||
"def!",
|
"def!",
|
||||||
])
|
])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "supports lexer modes" do
|
||||||
|
write_grammar <<EOF
|
||||||
|
token abc;
|
||||||
|
token def;
|
||||||
|
tokenid string;
|
||||||
|
drop /\\s+/;
|
||||||
|
/"/ <<
|
||||||
|
writeln("begin string mode");
|
||||||
|
$mode(string);
|
||||||
|
>>
|
||||||
|
string: /[^"]+/ <<
|
||||||
|
writeln("captured string");
|
||||||
|
>>
|
||||||
|
string: /"/ <<
|
||||||
|
$mode(default);
|
||||||
|
return $token(string);
|
||||||
|
>>
|
||||||
|
Start -> abc string def;
|
||||||
|
EOF
|
||||||
|
build_parser
|
||||||
|
compile("spec/test_lexer_modes.d")
|
||||||
|
results = run
|
||||||
|
expect(results.status).to eq 0
|
||||||
|
verify_lines(results.stdout, [
|
||||||
|
"begin string mode",
|
||||||
|
"captured string",
|
||||||
|
"pass1",
|
||||||
|
"begin string mode",
|
||||||
|
"captured string",
|
||||||
|
"pass2",
|
||||||
|
])
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
20
spec/test_lexer_modes.d
Normal file
20
spec/test_lexer_modes.d
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import testparser;
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string input = `abc "a string" def`;
|
||||||
|
auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||||
|
assert(parser.parse() == true);
|
||||||
|
writeln("pass1");
|
||||||
|
|
||||||
|
input = `abc "abc def" def`;
|
||||||
|
parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||||
|
assert(parser.parse() == true);
|
||||||
|
writeln("pass2");
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user