Replace LexedToken with Result struct

This commit is contained in:
Josh Holtrop 2023-03-11 21:16:55 -05:00
parent 1af018b103
commit b92679e0c2
3 changed files with 48 additions and 31 deletions

View File

@ -190,8 +190,16 @@ class <%= @classname %>
<% end %> <% end %>
]; ];
struct LexedToken struct Result
{ {
enum Type
{
DECODE_ERROR,
DROP,
TOKEN,
}
Type type;
size_t row; size_t row;
size_t col; size_t col;
size_t length; size_t length;
@ -211,14 +219,14 @@ class <%= @classname %>
m_mode = <%= @lexer.mode_id("default") %>; m_mode = <%= @lexer.mode_id("default") %>;
} }
LexedToken lex_token() Result lex_token()
{ {
for (;;) for (;;)
{ {
LexedToken lt = attempt_lex_token(); Result result = attempt_lex_token();
if (lt.token < _TOKEN_COUNT) if (result.token < _TOKEN_COUNT)
{ {
return lt; return result;
} }
} }
} }
@ -228,12 +236,12 @@ class <%= @classname %>
* *
* @param code_id The ID of the user code block to execute. * @param code_id The ID of the user code block to execute.
* @param match Matched text for this pattern. * @param match Matched text for this pattern.
* @param lt LexedToken lexer result in progress. * @param result Result lexer result in progress.
* *
* @return Token ID to accept, or _TOKEN_COUNT if the user code does * @return Token ID to accept, or _TOKEN_COUNT if the user code does
* not explicitly return a token. * not explicitly return a token.
*/ */
private uint user_code(uint code_id, string match, LexedToken * lt) private uint user_code(uint code_id, string match, Result * result)
{ {
switch (code_id) switch (code_id)
{ {
@ -250,12 +258,12 @@ class <%= @classname %>
return _TOKEN_COUNT; return _TOKEN_COUNT;
} }
private LexedToken attempt_lex_token() private Result attempt_lex_token()
{ {
LexedToken lt; Result result;
lt.row = m_input_row; result.row = m_input_row;
lt.col = m_input_col; result.col = m_input_col;
lt.token = _TOKEN_COUNT; result.token = _TOKEN_COUNT;
struct MatchInfo struct MatchInfo
{ {
size_t length; size_t length;
@ -271,8 +279,9 @@ class <%= @classname %>
auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]); auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]);
if (decoded.is_decode_error()) if (decoded.is_decode_error())
{ {
lt.token = _TOKEN_DECODE_ERROR; result.type = Result.Type.DECODE_ERROR;
return lt; result.token = _TOKEN_DECODE_ERROR;
return result;
} }
bool lex_continue = false; bool lex_continue = false;
if (!decoded.is_eof()) if (!decoded.is_eof())
@ -302,7 +311,7 @@ class <%= @classname %>
} }
else if (attempt_match_info.length == 0u) else if (attempt_match_info.length == 0u)
{ {
lt.token = TOKEN_0EOF; result.token = TOKEN_0EOF;
break; break;
} }
if (!lex_continue && (longest_match_info.accepting_state != null)) if (!lex_continue && (longest_match_info.accepting_state != null))
@ -310,7 +319,7 @@ class <%= @classname %>
uint token_to_accept = longest_match_info.accepting_state.token; uint token_to_accept = longest_match_info.accepting_state.token;
if (longest_match_info.accepting_state.code_id != 0xFFFF_FFFFu) if (longest_match_info.accepting_state.code_id != 0xFFFF_FFFFu)
{ {
uint user_code_token = user_code(longest_match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &lt); uint user_code_token = user_code(longest_match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &result);
/* A return of _TOKEN_COUNT from user_code() means /* A return of _TOKEN_COUNT from user_code() means
* that the user code did not explicitly return a * that the user code did not explicitly return a
* token. So only override the token to return if the * token. So only override the token to return if the
@ -332,12 +341,20 @@ class <%= @classname %>
{ {
m_input_col += longest_match_info.delta_col; m_input_col += longest_match_info.delta_col;
} }
lt.token = token_to_accept; result.token = token_to_accept;
lt.length = longest_match_info.length; result.length = longest_match_info.length;
break; break;
} }
} }
return lt; if (result.token == _TOKEN_DROP)
{
result.type = Result.Type.DROP;
}
else
{
result.type = Result.Type.TOKEN;
}
return result;
} }
private uint transition(uint current_state, uint code_point) private uint transition(uint current_state, uint code_point)
@ -420,7 +437,7 @@ class <%= @classname %>
bool parse() bool parse()
{ {
Lexer.LexedToken lexed_token; Lexer.Result lexed_token;
uint token = _TOKEN_COUNT; uint token = _TOKEN_COUNT;
StateValue[] statevalues = new StateValue[](1); StateValue[] statevalues = new StateValue[](1);
uint reduced_rule_set = 0xFFFFFFFFu; uint reduced_rule_set = 0xFFFFFFFFu;

View File

@ -195,7 +195,7 @@ class Propane
end end
else else
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
"lt.pvalue.v_#{pattern.ptypename}" "result.pvalue.v_#{pattern.ptypename}"
end end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1 mode_name = $1

View File

@ -35,18 +35,18 @@ unittest
unittest unittest
{ {
alias LT = Testparser.Lexer.LexedToken; alias Result = Testparser.Lexer.Result;
string input = "5 + 4 * \n677 + 567"; string input = "5 + 4 * \n677 + 567";
Testparser.Lexer lexer = new Testparser.Lexer(input); Testparser.Lexer lexer = new Testparser.Lexer(input);
assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_int)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 1, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_plus)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 2, 1, Testparser.TOKEN_plus));
assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_int)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 4, 1, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_times)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 6, 1, Testparser.TOKEN_times));
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 0, 3, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 4, 1, Testparser.TOKEN_plus));
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 6, 3, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 9, 0, Testparser.TOKEN_0EOF));
lexer = new Testparser.Lexer(""); lexer = new Testparser.Lexer("");
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF)); assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 0, Testparser.TOKEN_0EOF));
} }