Execute user code blocks assigned to tokens
This commit is contained in:
parent
92ce30f354
commit
672098ad32
@ -119,6 +119,7 @@ class <%= @classname %>
|
|||||||
uint transition_table_index;
|
uint transition_table_index;
|
||||||
uint n_transitions;
|
uint n_transitions;
|
||||||
uint accepts;
|
uint accepts;
|
||||||
|
uint code_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
<% transition_table, state_table = @lexer.build_tables %>
|
<% transition_table, state_table = @lexer.build_tables %>
|
||||||
@ -130,7 +131,10 @@ class <%= @classname %>
|
|||||||
|
|
||||||
private static const State states[] = [
|
private static const State states[] = [
|
||||||
<% state_table.each do |state_table_entry| %>
|
<% state_table.each do |state_table_entry| %>
|
||||||
State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
|
State(<%= state_table_entry[:transition_table_index] %>u,
|
||||||
|
<%= state_table_entry[:n_transitions] %>u,
|
||||||
|
<%= state_table_entry[:accepts] %>u,
|
||||||
|
<%= state_table_entry[:code_id] %>u),
|
||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -166,6 +170,31 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute user code associated with a lexer pattern.
|
||||||
|
*
|
||||||
|
* @param code_id The ID of the user code block to execute.
|
||||||
|
*
|
||||||
|
* @return Token ID to accept, or _TOKEN_NONE if the user code does
|
||||||
|
* not explicitly return a token.
|
||||||
|
*/
|
||||||
|
private uint user_code(uint code_id)
|
||||||
|
{
|
||||||
|
switch (code_id)
|
||||||
|
{
|
||||||
|
<% @grammar.patterns.each do |pattern| %>
|
||||||
|
<% if pattern.code_id %>
|
||||||
|
case <%= pattern.code_id %>u: {
|
||||||
|
<%= pattern.code %>
|
||||||
|
} break;
|
||||||
|
<% end %>
|
||||||
|
<% end %>
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _TOKEN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
private LexedToken attempt_lex_token()
|
private LexedToken attempt_lex_token()
|
||||||
{
|
{
|
||||||
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
|
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
|
||||||
@ -175,6 +204,7 @@ class <%= @classname %>
|
|||||||
size_t delta_row;
|
size_t delta_row;
|
||||||
size_t delta_col;
|
size_t delta_col;
|
||||||
uint token;
|
uint token;
|
||||||
|
uint code_id;
|
||||||
}
|
}
|
||||||
LexedTokenState last_accepts_info;
|
LexedTokenState last_accepts_info;
|
||||||
last_accepts_info.token = _TOKEN_NONE;
|
last_accepts_info.token = _TOKEN_NONE;
|
||||||
@ -209,6 +239,7 @@ class <%= @classname %>
|
|||||||
if (states[current_state].accepts != _TOKEN_NONE)
|
if (states[current_state].accepts != _TOKEN_NONE)
|
||||||
{
|
{
|
||||||
attempt_info.token = states[current_state].accepts;
|
attempt_info.token = states[current_state].accepts;
|
||||||
|
attempt_info.code_id = states[current_state].code_id;
|
||||||
last_accepts_info = attempt_info;
|
last_accepts_info = attempt_info;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -220,10 +251,24 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
if (!lex_continue)
|
if (!lex_continue)
|
||||||
{
|
{
|
||||||
if (last_accepts_info.token != _TOKEN_NONE)
|
bool pattern_accepted = false;
|
||||||
|
uint token_to_accept = last_accepts_info.token;
|
||||||
|
if (last_accepts_info.code_id != 0xFFFF_FFFFu)
|
||||||
{
|
{
|
||||||
lt.token = last_accepts_info.token;
|
uint user_code_token = user_code(last_accepts_info.code_id);
|
||||||
lt.length = last_accepts_info.length;
|
/* A return of _TOKEN_NONE from user_code() means
|
||||||
|
* that the user code did not explicitly return a
|
||||||
|
* token. So only override the token to return if the
|
||||||
|
* user code does explicitly return a token. */
|
||||||
|
if (user_code_token != _TOKEN_NONE)
|
||||||
|
{
|
||||||
|
token_to_accept = user_code_token;
|
||||||
|
}
|
||||||
|
pattern_accepted = true;
|
||||||
|
}
|
||||||
|
if (pattern_accepted || (token_to_accept != _TOKEN_NONE))
|
||||||
|
{
|
||||||
|
/* Update the input position tracking. */
|
||||||
m_input_position += last_accepts_info.length;
|
m_input_position += last_accepts_info.length;
|
||||||
m_input_row += last_accepts_info.delta_row;
|
m_input_row += last_accepts_info.delta_row;
|
||||||
if (last_accepts_info.delta_row != 0u)
|
if (last_accepts_info.delta_row != 0u)
|
||||||
@ -235,9 +280,15 @@ class <%= @classname %>
|
|||||||
m_input_col += last_accepts_info.delta_col;
|
m_input_col += last_accepts_info.delta_col;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (token_to_accept != _TOKEN_NONE)
|
||||||
|
{
|
||||||
|
/* We have a token to accept. */
|
||||||
|
lt.token = last_accepts_info.token;
|
||||||
|
lt.length = last_accepts_info.length;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return lt;
|
return lt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ class Propane
|
|||||||
@patterns = []
|
@patterns = []
|
||||||
@tokens = []
|
@tokens = []
|
||||||
@rules = []
|
@rules = []
|
||||||
|
@code_id = 0
|
||||||
input = input.gsub("\r\n", "\n")
|
input = input.gsub("\r\n", "\n")
|
||||||
parse_grammar(input)
|
parse_grammar(input)
|
||||||
end
|
end
|
||||||
@ -29,7 +30,7 @@ class Propane
|
|||||||
@modulename = $1
|
@modulename = $1
|
||||||
elsif sliced = input.slice!(/\Aclass\s+(\S+)\s*;/)
|
elsif sliced = input.slice!(/\Aclass\s+(\S+)\s*;/)
|
||||||
@classname = $1
|
@classname = $1
|
||||||
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+(.+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
|
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
|
||||||
name, pattern, code = $1, $2, $3
|
name, pattern, code = $1, $2, $3
|
||||||
if pattern.nil?
|
if pattern.nil?
|
||||||
pattern = name
|
pattern = name
|
||||||
@ -39,7 +40,13 @@ class Propane
|
|||||||
end
|
end
|
||||||
token = Token.new(name: name, id: @tokens.size, line_number: line_number)
|
token = Token.new(name: name, id: @tokens.size, line_number: line_number)
|
||||||
@tokens << token
|
@tokens << token
|
||||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number)
|
if code
|
||||||
|
code_id = @code_id
|
||||||
|
@code_id += 1
|
||||||
|
else
|
||||||
|
code_id = nil
|
||||||
|
end
|
||||||
|
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number, code: code, code_id: code_id)
|
||||||
@patterns << pattern
|
@patterns << pattern
|
||||||
elsif sliced = input.slice!(/\Atokenid\s+(\S+?)\s*;/m)
|
elsif sliced = input.slice!(/\Atokenid\s+(\S+?)\s*;/m)
|
||||||
name = $1
|
name = $1
|
||||||
@ -51,7 +58,7 @@ class Propane
|
|||||||
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
|
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
|
||||||
pattern = $1
|
pattern = $1
|
||||||
@patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
|
@patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
|
||||||
elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m)
|
elsif sliced = input.slice!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
|
||||||
rule_name, components, code = $1, $2, $3
|
rule_name, components, code = $1, $2, $3
|
||||||
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||||
raise Error.new("Invalid rule name #{name.inspect}")
|
raise Error.new("Invalid rule name #{name.inspect}")
|
||||||
|
@ -22,10 +22,17 @@ class Propane
|
|||||||
else
|
else
|
||||||
state.accepts.token.id
|
state.accepts.token.id
|
||||||
end
|
end
|
||||||
|
code_id =
|
||||||
|
if state.accepts && state.accepts.code_id
|
||||||
|
state.accepts.code_id
|
||||||
|
else
|
||||||
|
0xFFFF_FFFF
|
||||||
|
end
|
||||||
state_table << {
|
state_table << {
|
||||||
transition_table_index: transition_table.size,
|
transition_table_index: transition_table.size,
|
||||||
n_transitions: state.transitions.size,
|
n_transitions: state.transitions.size,
|
||||||
accepts: accepts,
|
accepts: accepts,
|
||||||
|
code_id: code_id,
|
||||||
}
|
}
|
||||||
state.transitions.each do |transition|
|
state.transitions.each do |transition|
|
||||||
transition_table << {
|
transition_table << {
|
||||||
|
@ -2,6 +2,14 @@ class Propane
|
|||||||
|
|
||||||
class Pattern
|
class Pattern
|
||||||
|
|
||||||
|
# @return [String, nil]
|
||||||
|
# Code block to execute when the pattern is matched.
|
||||||
|
attr_reader :code
|
||||||
|
|
||||||
|
# @option options [Integer, nil] :code_id
|
||||||
|
# Code block ID.
|
||||||
|
attr_reader :code_id
|
||||||
|
|
||||||
# @return [String, nil]
|
# @return [String, nil]
|
||||||
# Pattern.
|
# Pattern.
|
||||||
attr_reader :pattern
|
attr_reader :pattern
|
||||||
@ -22,6 +30,10 @@ class Propane
|
|||||||
#
|
#
|
||||||
# @param options [Hash]
|
# @param options [Hash]
|
||||||
# Optional parameters.
|
# Optional parameters.
|
||||||
|
# @option options [String, nil] :code
|
||||||
|
# Code block to execute when the pattern is matched.
|
||||||
|
# @option options [Integer, nil] :code_id
|
||||||
|
# Code block ID.
|
||||||
# @option options [Boolean] :drop
|
# @option options [Boolean] :drop
|
||||||
# Whether this is a drop pattern.
|
# Whether this is a drop pattern.
|
||||||
# @option options [String, nil] :pattern
|
# @option options [String, nil] :pattern
|
||||||
@ -31,6 +43,8 @@ class Propane
|
|||||||
# @option options [Integer, nil] :line_number
|
# @option options [Integer, nil] :line_number
|
||||||
# Line number where the token was defined in the input grammar.
|
# Line number where the token was defined in the input grammar.
|
||||||
def initialize(options)
|
def initialize(options)
|
||||||
|
@code = options[:code]
|
||||||
|
@code_id = options[:code_id]
|
||||||
@drop = options[:drop]
|
@drop = options[:drop]
|
||||||
@pattern = options[:pattern]
|
@pattern = options[:pattern]
|
||||||
@token = options[:token]
|
@token = options[:token]
|
||||||
|
@ -95,4 +95,19 @@ EOF
|
|||||||
compile("spec/test_d_lexer2.d")
|
compile("spec/test_d_lexer2.d")
|
||||||
run
|
run
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "executes user code when matching lexer token" do
|
||||||
|
write_grammar <<EOF
|
||||||
|
token abc <<
|
||||||
|
writeln("abc!");
|
||||||
|
>>
|
||||||
|
token def;
|
||||||
|
Start -> Abcs def;
|
||||||
|
Abcs -> ;
|
||||||
|
Abcs -> abc Abcs;
|
||||||
|
EOF
|
||||||
|
build_parser
|
||||||
|
compile("spec/test_user_code.d")
|
||||||
|
run
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
20
spec/test_user_code.d
Normal file
20
spec/test_user_code.d
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import testparser;
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string input = "abcdef";
|
||||||
|
auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||||
|
assert(parser.parse() == true);
|
||||||
|
writeln("pass1");
|
||||||
|
|
||||||
|
input = "abcabcdef";
|
||||||
|
parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||||
|
assert(parser.parse() == true);
|
||||||
|
writeln("pass2");
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user