Just return integer result code from Lexer.lex_token()

This commit is contained in:
Josh Holtrop 2023-07-08 08:08:36 -04:00
parent 0d0da49cd5
commit 8a377b4950
3 changed files with 55 additions and 50 deletions

View File

@ -235,17 +235,16 @@ class <%= @classname %>
<% end %> <% end %>
]; ];
struct Result public enum : size_t
{ {
enum Type P_TOKEN,
{ P_UNEXPECTED_INPUT,
DECODE_ERROR, P_DECODE_ERROR,
DROP, P_DROP,
TOKEN, }
UNEXPECTED_INPUT,
}
Type type; public static struct TokenInfo
{
size_t row; size_t row;
size_t col; size_t col;
size_t length; size_t length;
@ -265,12 +264,12 @@ class <%= @classname %>
m_mode = <%= @lexer.mode_id("default") %>; m_mode = <%= @lexer.mode_id("default") %>;
} }
Result lex_token() size_t lex_token(TokenInfo * out_token_info)
{ {
for (;;) for (;;)
{ {
Result result = attempt_lex_token(); size_t result = attempt_lex_token(out_token_info);
if (result.token < _TOKEN_COUNT) if (out_token_info.token < _TOKEN_COUNT)
{ {
return result; return result;
} }
@ -282,12 +281,12 @@ class <%= @classname %>
* *
* @param code_id The ID of the user code block to execute. * @param code_id The ID of the user code block to execute.
* @param match Matched text for this pattern. * @param match Matched text for this pattern.
* @param result Result lexer result in progress. * @param out_token_info Lexer token info in progress.
* *
* @return Token to accept, or invalid token if the user code does * @return Token to accept, or invalid token if the user code does
* not explicitly return a token. * not explicitly return a token.
*/ */
private Token user_code(UserCodeID code_id, string match, Result * result) private Token user_code(UserCodeID code_id, string match, TokenInfo * out_token_info)
{ {
switch (code_id) switch (code_id)
{ {
@ -304,12 +303,13 @@ class <%= @classname %>
return Token.invalid(); return Token.invalid();
} }
private Result attempt_lex_token() private size_t attempt_lex_token(TokenInfo * out_token_info)
{ {
Result result; TokenInfo token_info;
result.row = m_input_row; token_info.row = m_input_row;
result.col = m_input_col; token_info.col = m_input_col;
result.token = _TOKEN_COUNT; token_info.token = _TOKEN_COUNT;
*out_token_info = token_info; // TODO: remove
MatchInfo match_info; MatchInfo match_info;
size_t unexpected_input_length; size_t unexpected_input_length;
switch (find_longest_match(match_info, unexpected_input_length)) switch (find_longest_match(match_info, unexpected_input_length))
@ -318,7 +318,7 @@ class <%= @classname %>
uint token_to_accept = match_info.accepting_state.token; uint token_to_accept = match_info.accepting_state.token;
if (match_info.accepting_state.code_id.is_valid()) if (match_info.accepting_state.code_id.is_valid())
{ {
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &result); Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &token_info);
/* An invalid Token from user_code() means that the user /* An invalid Token from user_code() means that the user
* code did not explicitly return a token. So only override * code did not explicitly return a token. So only override
* the token to return if the user code does explicitly * the token to return if the user code does explicitly
@ -340,30 +340,25 @@ class <%= @classname %>
{ {
m_input_col += match_info.delta_col; m_input_col += match_info.delta_col;
} }
result.token = token_to_accept;
result.length = match_info.length;
if (match_info.accepting_state.drop) if (match_info.accepting_state.drop)
{ {
result.type = Result.Type.DROP; return P_DROP;
} }
else token_info.token = token_to_accept;
{ token_info.length = match_info.length;
result.type = Result.Type.TOKEN; *out_token_info = token_info;
} return P_TOKEN;
return result;
case FindLongestMatchResult.DECODE_ERROR: case FindLongestMatchResult.DECODE_ERROR:
result.type = Result.Type.DECODE_ERROR; return P_DECODE_ERROR;
return result;
case FindLongestMatchResult.EOF: case FindLongestMatchResult.EOF:
result.type = Result.Type.TOKEN; token_info.token = TOKEN___EOF;
result.token = TOKEN___EOF; *out_token_info = token_info;
return result; return P_TOKEN;
case FindLongestMatchResult.UNEXPECTED_INPUT: case FindLongestMatchResult.UNEXPECTED_INPUT:
result.type = Result.Type.UNEXPECTED_INPUT; return P_UNEXPECTED_INPUT;
return result;
default: default:
assert(false); assert(false);
@ -576,7 +571,7 @@ class <%= @classname %>
bool parse() bool parse()
{ {
Lexer.Result lexed_token; Lexer.TokenInfo token_info;
uint token = _TOKEN_COUNT; uint token = _TOKEN_COUNT;
StateValue[] statevalues = new StateValue[](1); StateValue[] statevalues = new StateValue[](1);
uint reduced_rule_set = 0xFFFFFFFFu; uint reduced_rule_set = 0xFFFFFFFFu;
@ -585,8 +580,8 @@ class <%= @classname %>
{ {
if (token == _TOKEN_COUNT) if (token == _TOKEN_COUNT)
{ {
lexed_token = m_lexer.lex_token(); size_t lexer_result = m_lexer.lex_token(&token_info);
token = lexed_token.token; token = token_info.token;
} }
uint shift_state = 0xFFFFFFFFu; uint shift_state = 0xFFFFFFFFu;
if (reduced_rule_set != 0xFFFFFFFFu) if (reduced_rule_set != 0xFFFFFFFFu)
@ -611,7 +606,7 @@ class <%= @classname %>
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
token = _TOKEN_COUNT; token = _TOKEN_COUNT;
statevalues[$-1].pvalue = lexed_token.pvalue; statevalues[$-1].pvalue = token_info.pvalue;
} }
else else
{ {

View File

@ -195,7 +195,7 @@ class Propane
end end
else else
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
"result.pvalue.v_#{pattern.ptypename}" "out_token_info.pvalue.v_#{pattern.ptypename}"
end end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1 mode_name = $1

View File

@ -43,18 +43,28 @@ unittest
unittest unittest
{ {
alias Result = Testparser.Lexer.Result; alias TokenInfo = Testparser.Lexer.TokenInfo;
TokenInfo token_info;
string input = "5 + 4 * \n677 + 567"; string input = "5 + 4 * \n677 + 567";
Testparser.Lexer lexer = new Testparser.Lexer(input); Testparser.Lexer lexer = new Testparser.Lexer(input);
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 1, Testparser.TOKEN_int)); assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 2, 1, Testparser.TOKEN_plus)); assert(token_info == TokenInfo(0, 0, 1, Testparser.TOKEN_int));
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 4, 1, Testparser.TOKEN_int)); assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 6, 1, Testparser.TOKEN_times)); assert(token_info == TokenInfo(0, 2, 1, Testparser.TOKEN_plus));
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 0, 3, Testparser.TOKEN_int)); assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 4, 1, Testparser.TOKEN_plus)); assert(token_info == TokenInfo(0, 4, 1, Testparser.TOKEN_int));
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 6, 3, Testparser.TOKEN_int)); assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 9, 0, Testparser.TOKEN___EOF)); assert(token_info == TokenInfo(0, 6, 1, Testparser.TOKEN_times));
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(token_info == TokenInfo(1, 0, 3, Testparser.TOKEN_int));
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(token_info == TokenInfo(1, 4, 1, Testparser.TOKEN_plus));
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(token_info == TokenInfo(1, 6, 3, Testparser.TOKEN_int));
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(token_info == TokenInfo(1, 9, 0, Testparser.TOKEN___EOF));
lexer = new Testparser.Lexer(""); lexer = new Testparser.Lexer("");
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 0, Testparser.TOKEN___EOF)); assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
assert(token_info == TokenInfo(0, 0, 0, Testparser.TOKEN___EOF));
} }