Allow storing a result value for a token from a lexer code block

This commit is contained in:
Josh Holtrop 2022-10-16 21:40:25 -04:00
parent ca8a360c0e
commit bca0a14371
4 changed files with 74 additions and 19 deletions

View File

@ -156,6 +156,7 @@ class <%= @classname %>
size_t col; size_t col;
size_t length; size_t length;
uint token; uint token;
<%= @grammar.result_type %> result;
} }
private string m_input; private string m_input;
@ -187,18 +188,19 @@ class <%= @classname %>
* *
* @param code_id The ID of the user code block to execute. * @param code_id The ID of the user code block to execute.
* @param match Matched text for this pattern. * @param match Matched text for this pattern.
* @param lt LexedToken lexer result in progress.
* *
* @return Token ID to accept, or _TOKEN_COUNT if the user code does * @return Token ID to accept, or _TOKEN_COUNT if the user code does
* not explicitly return a token. * not explicitly return a token.
*/ */
private uint user_code(uint code_id, string match) private uint user_code(uint code_id, string match, LexedToken * lt)
{ {
switch (code_id) switch (code_id)
{ {
<% @grammar.patterns.each do |pattern| %> <% @grammar.patterns.each do |pattern| %>
<% if pattern.code_id %> <% if pattern.code_id %>
case <%= pattern.code_id %>u: { case <%= pattern.code_id %>u: {
<%= expand_code(pattern.code) %> <%= expand_code(pattern.code, false) %>
} break; } break;
<% end %> <% end %>
<% end %> <% end %>
@ -210,7 +212,10 @@ class <%= @classname %>
private LexedToken attempt_lex_token() private LexedToken attempt_lex_token()
{ {
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT); LexedToken lt;
lt.row = m_input_row;
lt.col = m_input_col;
lt.token = _TOKEN_COUNT;
struct MatchInfo struct MatchInfo
{ {
size_t length; size_t length;
@ -269,7 +274,7 @@ class <%= @classname %>
uint token_to_accept = longest_match_info.token; uint token_to_accept = longest_match_info.token;
if (longest_match_info.code_id != 0xFFFF_FFFFu) if (longest_match_info.code_id != 0xFFFF_FFFFu)
{ {
uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)]); uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &lt);
/* A return of _TOKEN_COUNT from user_code() means /* A return of _TOKEN_COUNT from user_code() means
* that the user code did not explicitly return a * that the user code did not explicitly return a
* token. So only override the token to return if the * token. So only override the token to return if the
@ -417,6 +422,7 @@ class <%= @classname %>
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
token = _TOKEN_COUNT; token = _TOKEN_COUNT;
stateresults[$-1].result = lexed_token.result;
} }
else else
{ {
@ -520,7 +526,7 @@ class <%= @classname %>
<% @grammar.rules.each do |rule| %> <% @grammar.rules.each do |rule| %>
<% if rule.code %> <% if rule.code %>
case <%= rule.id %>u: { case <%= rule.id %>u: {
<%= expand_code(rule.code) %> <%= expand_code(rule.code, true) %>
} break; } break;
<% end %> <% end %>
<% end %> <% end %>

View File

@ -157,25 +157,37 @@ class Propane
# #
# @param code [String] # @param code [String]
# User code block. # User code block.
# @param parser [Boolean]
# Whether the user code is for the parser or lexer.
# #
# @return [String] # @return [String]
# Expanded user code block. # Expanded user code block.
def expand_code(code) def expand_code(code, parser)
code.gsub(/\$token\(([$\w]+)\)/) do |match| code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
"TOKEN_#{Token.code_name($1)}" "TOKEN_#{Token.code_name($1)}"
end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
mode_id = @lexer.mode_id(mode_name)
unless mode_id
raise Error.new("Lexer mode '#{mode_name}' not found")
end
"m_mode = #{mode_id}u"
end.gsub(/\$\$/) do |match|
"_result"
end.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"stateresults[$-1-n_states+#{index}].result"
end end
if parser
code = code.gsub(/\$\$/) do |match|
"_result"
end
code = code.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"stateresults[$-1-n_states+#{index}].result"
end
else
code = code.gsub(/\$\$/) do |match|
"lt.result"
end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
mode_id = @lexer.mode_id(mode_name)
unless mode_id
raise Error.new("Lexer mode '#{mode_name}' not found")
end
"m_mode = #{mode_id}u"
end
end
code
end end
end end

View File

@ -317,4 +317,21 @@ EOF
"pass1", "pass1",
]) ])
end end
it "allows storing a result value for the lexer" do
write_grammar <<EOF
result_type ulong;
token word /[a-z]+/ <<
$$ = match.length;
>>
Start -> word <<
$$ = $1;
>>
EOF
build_parser
compile("spec/test_lexer_result_value.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end end

View File

@ -0,0 +1,20 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = `x`;
auto parser = new Testparser.Parser(input);
assert(parser.parse() == true);
assert(parser.result == 1u);
input = `fabulous`;
parser = new Testparser.Parser(input);
assert(parser.parse() == true);
assert(parser.result == 8u);
}