Allow storing a result value for a token from a lexer code block
This commit is contained in:
parent
ca8a360c0e
commit
bca0a14371
@ -156,6 +156,7 @@ class <%= @classname %>
|
||||
size_t col;
|
||||
size_t length;
|
||||
uint token;
|
||||
<%= @grammar.result_type %> result;
|
||||
}
|
||||
|
||||
private string m_input;
|
||||
@ -187,18 +188,19 @@ class <%= @classname %>
|
||||
*
|
||||
* @param code_id The ID of the user code block to execute.
|
||||
* @param match Matched text for this pattern.
|
||||
* @param lt LexedToken lexer result in progress.
|
||||
*
|
||||
* @return Token ID to accept, or _TOKEN_COUNT if the user code does
|
||||
* not explicitly return a token.
|
||||
*/
|
||||
private uint user_code(uint code_id, string match)
|
||||
private uint user_code(uint code_id, string match, LexedToken * lt)
|
||||
{
|
||||
switch (code_id)
|
||||
{
|
||||
<% @grammar.patterns.each do |pattern| %>
|
||||
<% if pattern.code_id %>
|
||||
case <%= pattern.code_id %>u: {
|
||||
<%= expand_code(pattern.code) %>
|
||||
<%= expand_code(pattern.code, false) %>
|
||||
} break;
|
||||
<% end %>
|
||||
<% end %>
|
||||
@ -210,7 +212,10 @@ class <%= @classname %>
|
||||
|
||||
private LexedToken attempt_lex_token()
|
||||
{
|
||||
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT);
|
||||
LexedToken lt;
|
||||
lt.row = m_input_row;
|
||||
lt.col = m_input_col;
|
||||
lt.token = _TOKEN_COUNT;
|
||||
struct MatchInfo
|
||||
{
|
||||
size_t length;
|
||||
@ -269,7 +274,7 @@ class <%= @classname %>
|
||||
uint token_to_accept = longest_match_info.token;
|
||||
if (longest_match_info.code_id != 0xFFFF_FFFFu)
|
||||
{
|
||||
uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)]);
|
||||
uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], <);
|
||||
/* A return of _TOKEN_COUNT from user_code() means
|
||||
* that the user code did not explicitly return a
|
||||
* token. So only override the token to return if the
|
||||
@ -417,6 +422,7 @@ class <%= @classname %>
|
||||
{
|
||||
/* We shifted a token, mark it consumed. */
|
||||
token = _TOKEN_COUNT;
|
||||
stateresults[$-1].result = lexed_token.result;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -520,7 +526,7 @@ class <%= @classname %>
|
||||
<% @grammar.rules.each do |rule| %>
|
||||
<% if rule.code %>
|
||||
case <%= rule.id %>u: {
|
||||
<%= expand_code(rule.code) %>
|
||||
<%= expand_code(rule.code, true) %>
|
||||
} break;
|
||||
<% end %>
|
||||
<% end %>
|
||||
|
@ -157,25 +157,37 @@ class Propane
|
||||
#
|
||||
# @param code [String]
|
||||
# User code block.
|
||||
# @param parser [Boolean]
|
||||
# Whether the user code is for the parser or lexer.
|
||||
#
|
||||
# @return [String]
|
||||
# Expanded user code block.
|
||||
def expand_code(code)
|
||||
code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
||||
def expand_code(code, parser)
|
||||
code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
||||
"TOKEN_#{Token.code_name($1)}"
|
||||
end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
||||
mode_name = $1
|
||||
mode_id = @lexer.mode_id(mode_name)
|
||||
unless mode_id
|
||||
raise Error.new("Lexer mode '#{mode_name}' not found")
|
||||
end
|
||||
"m_mode = #{mode_id}u"
|
||||
end.gsub(/\$\$/) do |match|
|
||||
"_result"
|
||||
end.gsub(/\$(\d+)/) do |match|
|
||||
index = $1.to_i
|
||||
"stateresults[$-1-n_states+#{index}].result"
|
||||
end
|
||||
if parser
|
||||
code = code.gsub(/\$\$/) do |match|
|
||||
"_result"
|
||||
end
|
||||
code = code.gsub(/\$(\d+)/) do |match|
|
||||
index = $1.to_i
|
||||
"stateresults[$-1-n_states+#{index}].result"
|
||||
end
|
||||
else
|
||||
code = code.gsub(/\$\$/) do |match|
|
||||
"lt.result"
|
||||
end
|
||||
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
||||
mode_name = $1
|
||||
mode_id = @lexer.mode_id(mode_name)
|
||||
unless mode_id
|
||||
raise Error.new("Lexer mode '#{mode_name}' not found")
|
||||
end
|
||||
"m_mode = #{mode_id}u"
|
||||
end
|
||||
end
|
||||
code
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -317,4 +317,21 @@ EOF
|
||||
"pass1",
|
||||
])
|
||||
end
|
||||
|
||||
it "allows storing a result value for the lexer" do
|
||||
write_grammar <<EOF
|
||||
result_type ulong;
|
||||
token word /[a-z]+/ <<
|
||||
$$ = match.length;
|
||||
>>
|
||||
Start -> word <<
|
||||
$$ = $1;
|
||||
>>
|
||||
EOF
|
||||
build_parser
|
||||
compile("spec/test_lexer_result_value.d")
|
||||
results = run
|
||||
expect(results.stderr).to eq ""
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
end
|
||||
|
20
spec/test_lexer_result_value.d
Normal file
20
spec/test_lexer_result_value.d
Normal file
@ -0,0 +1,20 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = `x`;
|
||||
auto parser = new Testparser.Parser(input);
|
||||
assert(parser.parse() == true);
|
||||
assert(parser.result == 1u);
|
||||
|
||||
input = `fabulous`;
|
||||
parser = new Testparser.Parser(input);
|
||||
assert(parser.parse() == true);
|
||||
assert(parser.result == 8u);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user