Execute user code blocks assigned to tokens

This commit is contained in:
Josh Holtrop 2022-09-24 17:31:40 -04:00
parent 92ce30f354
commit 672098ad32
6 changed files with 122 additions and 8 deletions

View File

@ -119,6 +119,7 @@ class <%= @classname %>
uint transition_table_index;
uint n_transitions;
uint accepts;
uint code_id;
}
<% transition_table, state_table = @lexer.build_tables %>
@ -130,7 +131,10 @@ class <%= @classname %>
private static const State states[] = [
<% state_table.each do |state_table_entry| %>
State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
State(<%= state_table_entry[:transition_table_index] %>u,
<%= state_table_entry[:n_transitions] %>u,
<%= state_table_entry[:accepts] %>u,
<%= state_table_entry[:code_id] %>u),
<% end %>
];
@ -166,6 +170,31 @@ class <%= @classname %>
}
}
/**
* Execute user code associated with a lexer pattern.
*
* @param code_id The ID of the user code block to execute.
*
* @return Token ID to accept, or _TOKEN_NONE if the user code does
* not explicitly return a token.
*/
private uint user_code(uint code_id)
{
switch (code_id)
{
<% @grammar.patterns.each do |pattern| %>
<% if pattern.code_id %>
case <%= pattern.code_id %>u: {
<%= pattern.code %>
} break;
<% end %>
<% end %>
default: break;
}
return _TOKEN_NONE;
}
private LexedToken attempt_lex_token()
{
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
@ -175,6 +204,7 @@ class <%= @classname %>
size_t delta_row;
size_t delta_col;
uint token;
uint code_id;
}
LexedTokenState last_accepts_info;
last_accepts_info.token = _TOKEN_NONE;
@ -209,6 +239,7 @@ class <%= @classname %>
if (states[current_state].accepts != _TOKEN_NONE)
{
attempt_info.token = states[current_state].accepts;
attempt_info.code_id = states[current_state].code_id;
last_accepts_info = attempt_info;
}
}
@ -220,10 +251,24 @@ class <%= @classname %>
}
if (!lex_continue)
{
if (last_accepts_info.token != _TOKEN_NONE)
bool pattern_accepted = false;
uint token_to_accept = last_accepts_info.token;
if (last_accepts_info.code_id != 0xFFFF_FFFFu)
{
lt.token = last_accepts_info.token;
lt.length = last_accepts_info.length;
uint user_code_token = user_code(last_accepts_info.code_id);
/* A return of _TOKEN_NONE from user_code() means
* that the user code did not explicitly return a
* token. So only override the token to return if the
* user code does explicitly return a token. */
if (user_code_token != _TOKEN_NONE)
{
token_to_accept = user_code_token;
}
pattern_accepted = true;
}
if (pattern_accepted || (token_to_accept != _TOKEN_NONE))
{
/* Update the input position tracking. */
m_input_position += last_accepts_info.length;
m_input_row += last_accepts_info.delta_row;
if (last_accepts_info.delta_row != 0u)
@ -235,7 +280,13 @@ class <%= @classname %>
m_input_col += last_accepts_info.delta_col;
}
}
break;
if (token_to_accept != _TOKEN_NONE)
{
/* We have a token to accept. */
lt.token = last_accepts_info.token;
lt.length = last_accepts_info.length;
break;
}
}
}
return lt;

View File

@ -12,6 +12,7 @@ class Propane
@patterns = []
@tokens = []
@rules = []
@code_id = 0
input = input.gsub("\r\n", "\n")
parse_grammar(input)
end
@ -29,7 +30,7 @@ class Propane
@modulename = $1
elsif sliced = input.slice!(/\Aclass\s+(\S+)\s*;/)
@classname = $1
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+(.+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
name, pattern, code = $1, $2, $3
if pattern.nil?
pattern = name
@ -39,7 +40,13 @@ class Propane
end
token = Token.new(name: name, id: @tokens.size, line_number: line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number)
if code
code_id = @code_id
@code_id += 1
else
code_id = nil
end
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number, code: code, code_id: code_id)
@patterns << pattern
elsif sliced = input.slice!(/\Atokenid\s+(\S+?)\s*;/m)
name = $1
@ -51,7 +58,7 @@ class Propane
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
pattern = $1
@patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m)
elsif sliced = input.slice!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
rule_name, components, code = $1, $2, $3
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid rule name #{name.inspect}")

View File

@ -22,10 +22,17 @@ class Propane
else
state.accepts.token.id
end
code_id =
if state.accepts && state.accepts.code_id
state.accepts.code_id
else
0xFFFF_FFFF
end
state_table << {
transition_table_index: transition_table.size,
n_transitions: state.transitions.size,
accepts: accepts,
code_id: code_id,
}
state.transitions.each do |transition|
transition_table << {

View File

@ -2,6 +2,14 @@ class Propane
class Pattern
# @return [String, nil]
# Code block to execute when the pattern is matched.
attr_reader :code
# @option options [Integer, nil] :code_id
# Code block ID.
attr_reader :code_id
# @return [String, nil]
# Pattern.
attr_reader :pattern
@ -22,6 +30,10 @@ class Propane
#
# @param options [Hash]
# Optional parameters.
# @option options [String, nil] :code
# Code block to execute when the pattern is matched.
# @option options [Integer, nil] :code_id
# Code block ID.
# @option options [Boolean] :drop
# Whether this is a drop pattern.
# @option options [String, nil] :pattern
@ -31,6 +43,8 @@ class Propane
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
def initialize(options)
@code = options[:code]
@code_id = options[:code_id]
@drop = options[:drop]
@pattern = options[:pattern]
@token = options[:token]

View File

@ -95,4 +95,19 @@ EOF
compile("spec/test_d_lexer2.d")
run
end
it "executes user code when matching lexer token" do
write_grammar <<EOF
token abc <<
writeln("abc!");
>>
token def;
Start -> Abcs def;
Abcs -> ;
Abcs -> abc Abcs;
EOF
build_parser
compile("spec/test_user_code.d")
run
end
end

20
spec/test_user_code.d Normal file
View File

@ -0,0 +1,20 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "abcdef";
auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
assert(parser.parse() == true);
writeln("pass1");
input = "abcabcdef";
parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
assert(parser.parse() == true);
writeln("pass2");
}