Allow storing a result value for a token from a lexer code block

This commit is contained in:
Josh Holtrop 2022-10-16 21:40:25 -04:00
parent ca8a360c0e
commit bca0a14371
4 changed files with 74 additions and 19 deletions

View File

@ -156,6 +156,7 @@ class <%= @classname %>
size_t col;
size_t length;
uint token;
<%= @grammar.result_type %> result;
}
private string m_input;
@ -187,18 +188,19 @@ class <%= @classname %>
*
* @param code_id The ID of the user code block to execute.
* @param match Matched text for this pattern.
* @param lt LexedToken lexer result in progress.
*
* @return Token ID to accept, or _TOKEN_COUNT if the user code does
* not explicitly return a token.
*/
private uint user_code(uint code_id, string match)
private uint user_code(uint code_id, string match, LexedToken * lt)
{
switch (code_id)
{
<% @grammar.patterns.each do |pattern| %>
<% if pattern.code_id %>
case <%= pattern.code_id %>u: {
<%= expand_code(pattern.code) %>
<%= expand_code(pattern.code, false) %>
} break;
<% end %>
<% end %>
@ -210,7 +212,10 @@ class <%= @classname %>
private LexedToken attempt_lex_token()
{
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT);
LexedToken lt;
lt.row = m_input_row;
lt.col = m_input_col;
lt.token = _TOKEN_COUNT;
struct MatchInfo
{
size_t length;
@ -269,7 +274,7 @@ class <%= @classname %>
uint token_to_accept = longest_match_info.token;
if (longest_match_info.code_id != 0xFFFF_FFFFu)
{
uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)]);
uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &lt);
/* A return of _TOKEN_COUNT from user_code() means
* that the user code did not explicitly return a
* token. So only override the token to return if the
@ -417,6 +422,7 @@ class <%= @classname %>
{
/* We shifted a token, mark it consumed. */
token = _TOKEN_COUNT;
stateresults[$-1].result = lexed_token.result;
}
else
{
@ -520,7 +526,7 @@ class <%= @classname %>
<% @grammar.rules.each do |rule| %>
<% if rule.code %>
case <%= rule.id %>u: {
<%= expand_code(rule.code) %>
<%= expand_code(rule.code, true) %>
} break;
<% end %>
<% end %>

View File

@ -157,25 +157,37 @@ class Propane
#
# @param code [String]
# User code block.
# @param parser [Boolean]
# Whether the user code is for the parser or lexer.
#
# @return [String]
# Expanded user code block.
def expand_code(code)
code.gsub(/\$token\(([$\w]+)\)/) do |match|
def expand_code(code, parser)
code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
"TOKEN_#{Token.code_name($1)}"
end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
mode_id = @lexer.mode_id(mode_name)
unless mode_id
raise Error.new("Lexer mode '#{mode_name}' not found")
end
"m_mode = #{mode_id}u"
end.gsub(/\$\$/) do |match|
"_result"
end.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"stateresults[$-1-n_states+#{index}].result"
end
if parser
code = code.gsub(/\$\$/) do |match|
"_result"
end
code = code.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"stateresults[$-1-n_states+#{index}].result"
end
else
code = code.gsub(/\$\$/) do |match|
"lt.result"
end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
mode_id = @lexer.mode_id(mode_name)
unless mode_id
raise Error.new("Lexer mode '#{mode_name}' not found")
end
"m_mode = #{mode_id}u"
end
end
code
end
end

View File

@ -317,4 +317,21 @@ EOF
"pass1",
])
end
it "allows storing a result value for the lexer" do
write_grammar <<EOF
result_type ulong;
token word /[a-z]+/ <<
$$ = match.length;
>>
Start -> word <<
$$ = $1;
>>
EOF
build_parser
compile("spec/test_lexer_result_value.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end

View File

@ -0,0 +1,20 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = `x`;
auto parser = new Testparser.Parser(input);
assert(parser.parse() == true);
assert(parser.result == 1u);
input = `fabulous`;
parser = new Testparser.Parser(input);
assert(parser.parse() == true);
assert(parser.result == 8u);
}