From bca0a1437140302cfc117834681ee2417f04d8cd Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 16 Oct 2022 21:40:25 -0400 Subject: [PATCH] Allow storing a result value for a token from a lexer code block --- assets/parser.d.erb | 16 +++++++++----- lib/propane/generator.rb | 40 ++++++++++++++++++++++------------ spec/propane_spec.rb | 17 +++++++++++++++ spec/test_lexer_result_value.d | 20 +++++++++++++++++ 4 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 spec/test_lexer_result_value.d diff --git a/assets/parser.d.erb b/assets/parser.d.erb index e44b3a2..d397410 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -156,6 +156,7 @@ class <%= @classname %> size_t col; size_t length; uint token; + <%= @grammar.result_type %> result; } private string m_input; @@ -187,18 +188,19 @@ class <%= @classname %> * * @param code_id The ID of the user code block to execute. * @param match Matched text for this pattern. + * @param lt LexedToken lexer result in progress. * * @return Token ID to accept, or _TOKEN_COUNT if the user code does * not explicitly return a token. */ - private uint user_code(uint code_id, string match) + private uint user_code(uint code_id, string match, LexedToken * lt) { switch (code_id) { <% @grammar.patterns.each do |pattern| %> <% if pattern.code_id %> case <%= pattern.code_id %>u: { -<%= expand_code(pattern.code) %> +<%= expand_code(pattern.code, false) %> } break; <% end %> <% end %> @@ -210,7 +212,10 @@ class <%= @classname %> private LexedToken attempt_lex_token() { - LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT); + LexedToken lt; + lt.row = m_input_row; + lt.col = m_input_col; + lt.token = _TOKEN_COUNT; struct MatchInfo { size_t length; @@ -269,7 +274,7 @@ class <%= @classname %> uint token_to_accept = longest_match_info.token; if (longest_match_info.code_id != 0xFFFF_FFFFu) { - uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)]); + uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], <); /* A return of _TOKEN_COUNT from user_code() means * that the user code did not explicitly return a * token. So only override the token to return if the @@ -417,6 +422,7 @@ class <%= @classname %> { /* We shifted a token, mark it consumed. */ token = _TOKEN_COUNT; + stateresults[$-1].result = lexed_token.result; } else { @@ -520,7 +526,7 @@ class <%= @classname %> <% @grammar.rules.each do |rule| %> <% if rule.code %> case <%= rule.id %>u: { -<%= expand_code(rule.code) %> +<%= expand_code(rule.code, true) %> } break; <% end %> <% end %> diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 106aeb0..d23b7a5 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -157,25 +157,37 @@ class Propane # # @param code [String] # User code block. + # @param parser [Boolean] + # Whether the user code is for the parser or lexer. # # @return [String] # Expanded user code block. - def expand_code(code) - code.gsub(/\$token\(([$\w]+)\)/) do |match| + def expand_code(code, parser) + code = code.gsub(/\$token\(([$\w]+)\)/) do |match| "TOKEN_#{Token.code_name($1)}" - end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| - mode_name = $1 - mode_id = @lexer.mode_id(mode_name) - unless mode_id - raise Error.new("Lexer mode '#{mode_name}' not found") - end - "m_mode = #{mode_id}u" - end.gsub(/\$\$/) do |match| - "_result" - end.gsub(/\$(\d+)/) do |match| - index = $1.to_i - "stateresults[$-1-n_states+#{index}].result" end + if parser + code = code.gsub(/\$\$/) do |match| + "_result" + end + code = code.gsub(/\$(\d+)/) do |match| + index = $1.to_i + "stateresults[$-1-n_states+#{index}].result" + end + else + code = code.gsub(/\$\$/) do |match| + "lt.result" + end + code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| + mode_name = $1 + mode_id = @lexer.mode_id(mode_name) + unless mode_id + raise Error.new("Lexer mode '#{mode_name}' not found") + end + "m_mode = #{mode_id}u" + end + end + code end end diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 0bed976..71914b8 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -317,4 +317,21 @@ EOF "pass1", ]) end + + it "allows storing a result value for the lexer" do + write_grammar <> +Start -> word << + $$ = $1; +>> +EOF + build_parser + compile("spec/test_lexer_result_value.d") + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end diff --git a/spec/test_lexer_result_value.d b/spec/test_lexer_result_value.d new file mode 100644 index 0000000..7d2fa13 --- /dev/null +++ b/spec/test_lexer_result_value.d @@ -0,0 +1,20 @@ +import testparser; +import std.stdio; + +int main() +{ + return 0; +} + +unittest +{ + string input = `x`; + auto parser = new Testparser.Parser(input); + assert(parser.parse() == true); + assert(parser.result == 1u); + + input = `fabulous`; + parser = new Testparser.Parser(input); + assert(parser.parse() == true); + assert(parser.result == 8u); +}