Add lexer modes and $mode() code expansion
This commit is contained in:
parent
b2d11321fe
commit
02be6de48e
@ -124,14 +124,19 @@ class <%= @classname %>
|
||||
uint code_id;
|
||||
}
|
||||
|
||||
<% transition_table, state_table = @lexer.build_tables %>
|
||||
private struct Mode
|
||||
{
|
||||
uint state_table_offset;
|
||||
}
|
||||
|
||||
<% transition_table, state_table, mode_table = @lexer.build_tables %>
|
||||
private static immutable Transition transitions[] = [
|
||||
<% transition_table.each do |transition_table_entry| %>
|
||||
Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
|
||||
<% end %>
|
||||
];
|
||||
|
||||
private static const State states[] = [
|
||||
private static immutable State states[] = [
|
||||
<% state_table.each do |state_table_entry| %>
|
||||
State(<%= state_table_entry[:transition_table_index] %>u,
|
||||
<%= state_table_entry[:n_transitions] %>u,
|
||||
@ -140,6 +145,12 @@ class <%= @classname %>
|
||||
<% end %>
|
||||
];
|
||||
|
||||
private static immutable Mode modes[] = [
|
||||
<% mode_table.each do |mode_table_entry| %>
|
||||
Mode(<%= mode_table_entry[:state_table_offset] %>),
|
||||
<% end %>
|
||||
];
|
||||
|
||||
struct LexedToken
|
||||
{
|
||||
size_t row;
|
||||
@ -153,11 +164,13 @@ class <%= @classname %>
|
||||
private size_t m_input_position;
|
||||
private size_t m_input_row;
|
||||
private size_t m_input_col;
|
||||
private size_t m_mode;
|
||||
|
||||
this(const(ubyte) * input, size_t input_length)
|
||||
{
|
||||
m_input = input;
|
||||
m_input_length = input_length;
|
||||
m_mode = <%= @lexer.mode_id("default") %>;
|
||||
}
|
||||
|
||||
LexedToken lex_token()
|
||||
@ -211,7 +224,7 @@ class <%= @classname %>
|
||||
MatchInfo longest_match_info;
|
||||
longest_match_info.token = _TOKEN_COUNT;
|
||||
MatchInfo attempt_match_info;
|
||||
uint current_state;
|
||||
uint current_state = modes[m_mode].state_table_offset;
|
||||
for (;;)
|
||||
{
|
||||
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_match_info.length], m_input_length - m_input_position - attempt_match_info.length);
|
||||
|
@ -26,6 +26,17 @@ class Propane
|
||||
private
|
||||
|
||||
def process_grammar!
|
||||
# Assign default pattern mode to patterns without a mode assigned.
|
||||
found_default = false
|
||||
@grammar.patterns.each do |pattern|
|
||||
if pattern.mode.nil?
|
||||
pattern.mode = "default"
|
||||
found_default = true
|
||||
end
|
||||
end
|
||||
unless found_default
|
||||
raise Error.new("No patterns found for default mode")
|
||||
end
|
||||
# Add EOF token.
|
||||
@grammar.tokens << Token.new("$EOF", nil)
|
||||
tokens_by_name = {}
|
||||
@ -152,6 +163,13 @@ class Propane
|
||||
def expand_code(code)
|
||||
code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
||||
"TOKEN_#{Token.code_name($1)}"
|
||||
end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
||||
mode_name = $1
|
||||
mode_id = @lexer.mode_id(mode_name)
|
||||
unless mode_id
|
||||
raise Error.new("Lexer mode '#{mode_name}' not found")
|
||||
end
|
||||
"m_mode = #{mode_id}u"
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,51 +1,74 @@
|
||||
class Propane
|
||||
class Lexer
|
||||
|
||||
# @return [DFA]
|
||||
# Lexer DFA.
|
||||
attr_accessor :dfa
|
||||
|
||||
def initialize(grammar)
|
||||
@grammar = grammar
|
||||
@dfa = DFA.new(grammar.patterns)
|
||||
end
|
||||
|
||||
def build_tables
|
||||
@modes = @grammar.patterns.group_by do |pattern|
|
||||
pattern.mode
|
||||
end.transform_values do |patterns|
|
||||
{dfa: DFA.new(patterns)}
|
||||
end
|
||||
@modes.each_with_index do |(mode_name, mode_info), index|
|
||||
mode_info[:id] = index
|
||||
end
|
||||
transition_table = []
|
||||
state_table = []
|
||||
states = @dfa.enumerate
|
||||
states.each do |state, id|
|
||||
token =
|
||||
if state.accepts.nil?
|
||||
@grammar.tokens.size
|
||||
elsif state.accepts.drop?
|
||||
TOKEN_DROP
|
||||
elsif state.accepts.token
|
||||
state.accepts.token.id
|
||||
else
|
||||
@grammar.tokens.size
|
||||
end
|
||||
code_id =
|
||||
if state.accepts && state.accepts.code_id
|
||||
state.accepts.code_id
|
||||
else
|
||||
0xFFFF_FFFF
|
||||
end
|
||||
state_table << {
|
||||
transition_table_index: transition_table.size,
|
||||
n_transitions: state.transitions.size,
|
||||
token: token,
|
||||
code_id: code_id,
|
||||
mode_table = []
|
||||
@modes.each do |mode_name, mode_info|
|
||||
state_table_offset = state_table.size
|
||||
mode_table << {
|
||||
state_table_offset: state_table_offset,
|
||||
}
|
||||
state.transitions.each do |transition|
|
||||
transition_table << {
|
||||
first: transition.code_point_range.first,
|
||||
last: transition.code_point_range.last,
|
||||
destination: states[transition.destination],
|
||||
states = mode_info[:dfa].enumerate
|
||||
states.each do |state, id|
|
||||
token =
|
||||
if state.accepts.nil?
|
||||
@grammar.tokens.size
|
||||
elsif state.accepts.drop?
|
||||
TOKEN_DROP
|
||||
elsif state.accepts.token
|
||||
state.accepts.token.id
|
||||
else
|
||||
@grammar.tokens.size
|
||||
end
|
||||
code_id =
|
||||
if state.accepts && state.accepts.code_id
|
||||
state.accepts.code_id
|
||||
else
|
||||
0xFFFF_FFFF
|
||||
end
|
||||
state_table << {
|
||||
transition_table_index: transition_table.size,
|
||||
n_transitions: state.transitions.size,
|
||||
token: token,
|
||||
code_id: code_id,
|
||||
}
|
||||
state.transitions.each do |transition|
|
||||
transition_table << {
|
||||
first: transition.code_point_range.first,
|
||||
last: transition.code_point_range.last,
|
||||
destination: states[transition.destination] + state_table_offset,
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
[transition_table, state_table]
|
||||
[transition_table, state_table, mode_table]
|
||||
end
|
||||
|
||||
# Get ID for a mode.
|
||||
#
|
||||
# @param mode_name [String]
|
||||
# Mode name.
|
||||
#
|
||||
# @return [Integer, nil]
|
||||
# Mode ID.
|
||||
def mode_id(mode_name)
|
||||
if mode_info = @modes[mode_name]
|
||||
mode_info[:id]
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -28,7 +28,7 @@ class Propane
|
||||
|
||||
# @return [String, nil]
|
||||
# Lexer mode for this pattern.
|
||||
attr_reader :mode
|
||||
attr_accessor :mode
|
||||
|
||||
# Construct a Pattern.
|
||||
#
|
||||
|
@ -192,4 +192,37 @@ EOF
|
||||
"def!",
|
||||
])
|
||||
end
|
||||
|
||||
it "supports lexer modes" do
|
||||
write_grammar <<EOF
|
||||
token abc;
|
||||
token def;
|
||||
tokenid string;
|
||||
drop /\\s+/;
|
||||
/"/ <<
|
||||
writeln("begin string mode");
|
||||
$mode(string);
|
||||
>>
|
||||
string: /[^"]+/ <<
|
||||
writeln("captured string");
|
||||
>>
|
||||
string: /"/ <<
|
||||
$mode(default);
|
||||
return $token(string);
|
||||
>>
|
||||
Start -> abc string def;
|
||||
EOF
|
||||
build_parser
|
||||
compile("spec/test_lexer_modes.d")
|
||||
results = run
|
||||
expect(results.status).to eq 0
|
||||
verify_lines(results.stdout, [
|
||||
"begin string mode",
|
||||
"captured string",
|
||||
"pass1",
|
||||
"begin string mode",
|
||||
"captured string",
|
||||
"pass2",
|
||||
])
|
||||
end
|
||||
end
|
||||
|
20
spec/test_lexer_modes.d
Normal file
20
spec/test_lexer_modes.d
Normal file
@ -0,0 +1,20 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = `abc "a string" def`;
|
||||
auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||
assert(parser.parse() == true);
|
||||
writeln("pass1");
|
||||
|
||||
input = `abc "abc def" def`;
|
||||
parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||
assert(parser.parse() == true);
|
||||
writeln("pass2");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user