Execute user code blocks assigned to tokens
This commit is contained in:
parent
92ce30f354
commit
672098ad32
@ -119,6 +119,7 @@ class <%= @classname %>
|
||||
uint transition_table_index;
|
||||
uint n_transitions;
|
||||
uint accepts;
|
||||
uint code_id;
|
||||
}
|
||||
|
||||
<% transition_table, state_table = @lexer.build_tables %>
|
||||
@ -130,7 +131,10 @@ class <%= @classname %>
|
||||
|
||||
private static const State states[] = [
|
||||
<% state_table.each do |state_table_entry| %>
|
||||
State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
|
||||
State(<%= state_table_entry[:transition_table_index] %>u,
|
||||
<%= state_table_entry[:n_transitions] %>u,
|
||||
<%= state_table_entry[:accepts] %>u,
|
||||
<%= state_table_entry[:code_id] %>u),
|
||||
<% end %>
|
||||
];
|
||||
|
||||
@ -166,6 +170,31 @@ class <%= @classname %>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute user code associated with a lexer pattern.
|
||||
*
|
||||
* @param code_id The ID of the user code block to execute.
|
||||
*
|
||||
* @return Token ID to accept, or _TOKEN_NONE if the user code does
|
||||
* not explicitly return a token.
|
||||
*/
|
||||
private uint user_code(uint code_id)
|
||||
{
|
||||
switch (code_id)
|
||||
{
|
||||
<% @grammar.patterns.each do |pattern| %>
|
||||
<% if pattern.code_id %>
|
||||
case <%= pattern.code_id %>u: {
|
||||
<%= pattern.code %>
|
||||
} break;
|
||||
<% end %>
|
||||
<% end %>
|
||||
default: break;
|
||||
}
|
||||
|
||||
return _TOKEN_NONE;
|
||||
}
|
||||
|
||||
private LexedToken attempt_lex_token()
|
||||
{
|
||||
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
|
||||
@ -175,6 +204,7 @@ class <%= @classname %>
|
||||
size_t delta_row;
|
||||
size_t delta_col;
|
||||
uint token;
|
||||
uint code_id;
|
||||
}
|
||||
LexedTokenState last_accepts_info;
|
||||
last_accepts_info.token = _TOKEN_NONE;
|
||||
@ -209,6 +239,7 @@ class <%= @classname %>
|
||||
if (states[current_state].accepts != _TOKEN_NONE)
|
||||
{
|
||||
attempt_info.token = states[current_state].accepts;
|
||||
attempt_info.code_id = states[current_state].code_id;
|
||||
last_accepts_info = attempt_info;
|
||||
}
|
||||
}
|
||||
@ -220,10 +251,24 @@ class <%= @classname %>
|
||||
}
|
||||
if (!lex_continue)
|
||||
{
|
||||
if (last_accepts_info.token != _TOKEN_NONE)
|
||||
bool pattern_accepted = false;
|
||||
uint token_to_accept = last_accepts_info.token;
|
||||
if (last_accepts_info.code_id != 0xFFFF_FFFFu)
|
||||
{
|
||||
lt.token = last_accepts_info.token;
|
||||
lt.length = last_accepts_info.length;
|
||||
uint user_code_token = user_code(last_accepts_info.code_id);
|
||||
/* A return of _TOKEN_NONE from user_code() means
|
||||
* that the user code did not explicitly return a
|
||||
* token. So only override the token to return if the
|
||||
* user code does explicitly return a token. */
|
||||
if (user_code_token != _TOKEN_NONE)
|
||||
{
|
||||
token_to_accept = user_code_token;
|
||||
}
|
||||
pattern_accepted = true;
|
||||
}
|
||||
if (pattern_accepted || (token_to_accept != _TOKEN_NONE))
|
||||
{
|
||||
/* Update the input position tracking. */
|
||||
m_input_position += last_accepts_info.length;
|
||||
m_input_row += last_accepts_info.delta_row;
|
||||
if (last_accepts_info.delta_row != 0u)
|
||||
@ -235,7 +280,13 @@ class <%= @classname %>
|
||||
m_input_col += last_accepts_info.delta_col;
|
||||
}
|
||||
}
|
||||
break;
|
||||
if (token_to_accept != _TOKEN_NONE)
|
||||
{
|
||||
/* We have a token to accept. */
|
||||
lt.token = last_accepts_info.token;
|
||||
lt.length = last_accepts_info.length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return lt;
|
||||
|
@ -12,6 +12,7 @@ class Propane
|
||||
@patterns = []
|
||||
@tokens = []
|
||||
@rules = []
|
||||
@code_id = 0
|
||||
input = input.gsub("\r\n", "\n")
|
||||
parse_grammar(input)
|
||||
end
|
||||
@ -29,7 +30,7 @@ class Propane
|
||||
@modulename = $1
|
||||
elsif sliced = input.slice!(/\Aclass\s+(\S+)\s*;/)
|
||||
@classname = $1
|
||||
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+(.+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
|
||||
elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
|
||||
name, pattern, code = $1, $2, $3
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
@ -39,7 +40,13 @@ class Propane
|
||||
end
|
||||
token = Token.new(name: name, id: @tokens.size, line_number: line_number)
|
||||
@tokens << token
|
||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number)
|
||||
if code
|
||||
code_id = @code_id
|
||||
@code_id += 1
|
||||
else
|
||||
code_id = nil
|
||||
end
|
||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number, code: code, code_id: code_id)
|
||||
@patterns << pattern
|
||||
elsif sliced = input.slice!(/\Atokenid\s+(\S+?)\s*;/m)
|
||||
name = $1
|
||||
@ -51,7 +58,7 @@ class Propane
|
||||
elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
|
||||
pattern = $1
|
||||
@patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
|
||||
elsif sliced = input.slice!(/\A(\S+)\s*->\s*(.*?)(?:;|<<\n(.*?)^>>\n)/m)
|
||||
elsif sliced = input.slice!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
|
||||
rule_name, components, code = $1, $2, $3
|
||||
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid rule name #{name.inspect}")
|
||||
|
@ -22,10 +22,17 @@ class Propane
|
||||
else
|
||||
state.accepts.token.id
|
||||
end
|
||||
code_id =
|
||||
if state.accepts && state.accepts.code_id
|
||||
state.accepts.code_id
|
||||
else
|
||||
0xFFFF_FFFF
|
||||
end
|
||||
state_table << {
|
||||
transition_table_index: transition_table.size,
|
||||
n_transitions: state.transitions.size,
|
||||
accepts: accepts,
|
||||
code_id: code_id,
|
||||
}
|
||||
state.transitions.each do |transition|
|
||||
transition_table << {
|
||||
|
@ -2,6 +2,14 @@ class Propane
|
||||
|
||||
class Pattern
|
||||
|
||||
# @return [String, nil]
|
||||
# Code block to execute when the pattern is matched.
|
||||
attr_reader :code
|
||||
|
||||
# @option options [Integer, nil] :code_id
|
||||
# Code block ID.
|
||||
attr_reader :code_id
|
||||
|
||||
# @return [String, nil]
|
||||
# Pattern.
|
||||
attr_reader :pattern
|
||||
@ -22,6 +30,10 @@ class Propane
|
||||
#
|
||||
# @param options [Hash]
|
||||
# Optional parameters.
|
||||
# @option options [String, nil] :code
|
||||
# Code block to execute when the pattern is matched.
|
||||
# @option options [Integer, nil] :code_id
|
||||
# Code block ID.
|
||||
# @option options [Boolean] :drop
|
||||
# Whether this is a drop pattern.
|
||||
# @option options [String, nil] :pattern
|
||||
@ -31,6 +43,8 @@ class Propane
|
||||
# @option options [Integer, nil] :line_number
|
||||
# Line number where the token was defined in the input grammar.
|
||||
def initialize(options)
|
||||
@code = options[:code]
|
||||
@code_id = options[:code_id]
|
||||
@drop = options[:drop]
|
||||
@pattern = options[:pattern]
|
||||
@token = options[:token]
|
||||
|
@ -95,4 +95,19 @@ EOF
|
||||
compile("spec/test_d_lexer2.d")
|
||||
run
|
||||
end
|
||||
|
||||
it "executes user code when matching lexer token" do
|
||||
write_grammar <<EOF
|
||||
token abc <<
|
||||
writeln("abc!");
|
||||
>>
|
||||
token def;
|
||||
Start -> Abcs def;
|
||||
Abcs -> ;
|
||||
Abcs -> abc Abcs;
|
||||
EOF
|
||||
build_parser
|
||||
compile("spec/test_user_code.d")
|
||||
run
|
||||
end
|
||||
end
|
||||
|
20
spec/test_user_code.d
Normal file
20
spec/test_user_code.d
Normal file
@ -0,0 +1,20 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = "abcdef";
|
||||
auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||
assert(parser.parse() == true);
|
||||
writeln("pass1");
|
||||
|
||||
input = "abcabcdef";
|
||||
parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
|
||||
assert(parser.parse() == true);
|
||||
writeln("pass2");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user