Just return integer result code from Lexer.lex_token()
This commit is contained in:
parent
0d0da49cd5
commit
8a377b4950
@ -235,17 +235,16 @@ class <%= @classname %>
|
|||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
struct Result
|
public enum : size_t
|
||||||
{
|
{
|
||||||
enum Type
|
P_TOKEN,
|
||||||
{
|
P_UNEXPECTED_INPUT,
|
||||||
DECODE_ERROR,
|
P_DECODE_ERROR,
|
||||||
DROP,
|
P_DROP,
|
||||||
TOKEN,
|
}
|
||||||
UNEXPECTED_INPUT,
|
|
||||||
}
|
|
||||||
|
|
||||||
Type type;
|
public static struct TokenInfo
|
||||||
|
{
|
||||||
size_t row;
|
size_t row;
|
||||||
size_t col;
|
size_t col;
|
||||||
size_t length;
|
size_t length;
|
||||||
@ -265,12 +264,12 @@ class <%= @classname %>
|
|||||||
m_mode = <%= @lexer.mode_id("default") %>;
|
m_mode = <%= @lexer.mode_id("default") %>;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result lex_token()
|
size_t lex_token(TokenInfo * out_token_info)
|
||||||
{
|
{
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
Result result = attempt_lex_token();
|
size_t result = attempt_lex_token(out_token_info);
|
||||||
if (result.token < _TOKEN_COUNT)
|
if (out_token_info.token < _TOKEN_COUNT)
|
||||||
{
|
{
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -282,12 +281,12 @@ class <%= @classname %>
|
|||||||
*
|
*
|
||||||
* @param code_id The ID of the user code block to execute.
|
* @param code_id The ID of the user code block to execute.
|
||||||
* @param match Matched text for this pattern.
|
* @param match Matched text for this pattern.
|
||||||
* @param result Result lexer result in progress.
|
* @param out_token_info Lexer token info in progress.
|
||||||
*
|
*
|
||||||
* @return Token to accept, or invalid token if the user code does
|
* @return Token to accept, or invalid token if the user code does
|
||||||
* not explicitly return a token.
|
* not explicitly return a token.
|
||||||
*/
|
*/
|
||||||
private Token user_code(UserCodeID code_id, string match, Result * result)
|
private Token user_code(UserCodeID code_id, string match, TokenInfo * out_token_info)
|
||||||
{
|
{
|
||||||
switch (code_id)
|
switch (code_id)
|
||||||
{
|
{
|
||||||
@ -304,12 +303,13 @@ class <%= @classname %>
|
|||||||
return Token.invalid();
|
return Token.invalid();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result attempt_lex_token()
|
private size_t attempt_lex_token(TokenInfo * out_token_info)
|
||||||
{
|
{
|
||||||
Result result;
|
TokenInfo token_info;
|
||||||
result.row = m_input_row;
|
token_info.row = m_input_row;
|
||||||
result.col = m_input_col;
|
token_info.col = m_input_col;
|
||||||
result.token = _TOKEN_COUNT;
|
token_info.token = _TOKEN_COUNT;
|
||||||
|
*out_token_info = token_info; // TODO: remove
|
||||||
MatchInfo match_info;
|
MatchInfo match_info;
|
||||||
size_t unexpected_input_length;
|
size_t unexpected_input_length;
|
||||||
switch (find_longest_match(match_info, unexpected_input_length))
|
switch (find_longest_match(match_info, unexpected_input_length))
|
||||||
@ -318,7 +318,7 @@ class <%= @classname %>
|
|||||||
uint token_to_accept = match_info.accepting_state.token;
|
uint token_to_accept = match_info.accepting_state.token;
|
||||||
if (match_info.accepting_state.code_id.is_valid())
|
if (match_info.accepting_state.code_id.is_valid())
|
||||||
{
|
{
|
||||||
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &result);
|
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &token_info);
|
||||||
/* An invalid Token from user_code() means that the user
|
/* An invalid Token from user_code() means that the user
|
||||||
* code did not explicitly return a token. So only override
|
* code did not explicitly return a token. So only override
|
||||||
* the token to return if the user code does explicitly
|
* the token to return if the user code does explicitly
|
||||||
@ -340,30 +340,25 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
m_input_col += match_info.delta_col;
|
m_input_col += match_info.delta_col;
|
||||||
}
|
}
|
||||||
result.token = token_to_accept;
|
|
||||||
result.length = match_info.length;
|
|
||||||
if (match_info.accepting_state.drop)
|
if (match_info.accepting_state.drop)
|
||||||
{
|
{
|
||||||
result.type = Result.Type.DROP;
|
return P_DROP;
|
||||||
}
|
}
|
||||||
else
|
token_info.token = token_to_accept;
|
||||||
{
|
token_info.length = match_info.length;
|
||||||
result.type = Result.Type.TOKEN;
|
*out_token_info = token_info;
|
||||||
}
|
return P_TOKEN;
|
||||||
return result;
|
|
||||||
|
|
||||||
case FindLongestMatchResult.DECODE_ERROR:
|
case FindLongestMatchResult.DECODE_ERROR:
|
||||||
result.type = Result.Type.DECODE_ERROR;
|
return P_DECODE_ERROR;
|
||||||
return result;
|
|
||||||
|
|
||||||
case FindLongestMatchResult.EOF:
|
case FindLongestMatchResult.EOF:
|
||||||
result.type = Result.Type.TOKEN;
|
token_info.token = TOKEN___EOF;
|
||||||
result.token = TOKEN___EOF;
|
*out_token_info = token_info;
|
||||||
return result;
|
return P_TOKEN;
|
||||||
|
|
||||||
case FindLongestMatchResult.UNEXPECTED_INPUT:
|
case FindLongestMatchResult.UNEXPECTED_INPUT:
|
||||||
result.type = Result.Type.UNEXPECTED_INPUT;
|
return P_UNEXPECTED_INPUT;
|
||||||
return result;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
@ -576,7 +571,7 @@ class <%= @classname %>
|
|||||||
|
|
||||||
bool parse()
|
bool parse()
|
||||||
{
|
{
|
||||||
Lexer.Result lexed_token;
|
Lexer.TokenInfo token_info;
|
||||||
uint token = _TOKEN_COUNT;
|
uint token = _TOKEN_COUNT;
|
||||||
StateValue[] statevalues = new StateValue[](1);
|
StateValue[] statevalues = new StateValue[](1);
|
||||||
uint reduced_rule_set = 0xFFFFFFFFu;
|
uint reduced_rule_set = 0xFFFFFFFFu;
|
||||||
@ -585,8 +580,8 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
if (token == _TOKEN_COUNT)
|
if (token == _TOKEN_COUNT)
|
||||||
{
|
{
|
||||||
lexed_token = m_lexer.lex_token();
|
size_t lexer_result = m_lexer.lex_token(&token_info);
|
||||||
token = lexed_token.token;
|
token = token_info.token;
|
||||||
}
|
}
|
||||||
uint shift_state = 0xFFFFFFFFu;
|
uint shift_state = 0xFFFFFFFFu;
|
||||||
if (reduced_rule_set != 0xFFFFFFFFu)
|
if (reduced_rule_set != 0xFFFFFFFFu)
|
||||||
@ -611,7 +606,7 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
/* We shifted a token, mark it consumed. */
|
/* We shifted a token, mark it consumed. */
|
||||||
token = _TOKEN_COUNT;
|
token = _TOKEN_COUNT;
|
||||||
statevalues[$-1].pvalue = lexed_token.pvalue;
|
statevalues[$-1].pvalue = token_info.pvalue;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -195,7 +195,7 @@ class Propane
|
|||||||
end
|
end
|
||||||
else
|
else
|
||||||
code = code.gsub(/\$\$/) do |match|
|
code = code.gsub(/\$\$/) do |match|
|
||||||
"result.pvalue.v_#{pattern.ptypename}"
|
"out_token_info.pvalue.v_#{pattern.ptypename}"
|
||||||
end
|
end
|
||||||
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
|
||||||
mode_name = $1
|
mode_name = $1
|
||||||
|
@ -43,18 +43,28 @@ unittest
|
|||||||
|
|
||||||
unittest
|
unittest
|
||||||
{
|
{
|
||||||
alias Result = Testparser.Lexer.Result;
|
alias TokenInfo = Testparser.Lexer.TokenInfo;
|
||||||
|
TokenInfo token_info;
|
||||||
string input = "5 + 4 * \n677 + 567";
|
string input = "5 + 4 * \n677 + 567";
|
||||||
Testparser.Lexer lexer = new Testparser.Lexer(input);
|
Testparser.Lexer lexer = new Testparser.Lexer(input);
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 1, Testparser.TOKEN_int));
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 2, 1, Testparser.TOKEN_plus));
|
assert(token_info == TokenInfo(0, 0, 1, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 4, 1, Testparser.TOKEN_int));
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 6, 1, Testparser.TOKEN_times));
|
assert(token_info == TokenInfo(0, 2, 1, Testparser.TOKEN_plus));
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 0, 3, Testparser.TOKEN_int));
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 4, 1, Testparser.TOKEN_plus));
|
assert(token_info == TokenInfo(0, 4, 1, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 6, 3, Testparser.TOKEN_int));
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 9, 0, Testparser.TOKEN___EOF));
|
assert(token_info == TokenInfo(0, 6, 1, Testparser.TOKEN_times));
|
||||||
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
|
assert(token_info == TokenInfo(1, 0, 3, Testparser.TOKEN_int));
|
||||||
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
|
assert(token_info == TokenInfo(1, 4, 1, Testparser.TOKEN_plus));
|
||||||
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
|
assert(token_info == TokenInfo(1, 6, 3, Testparser.TOKEN_int));
|
||||||
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
|
assert(token_info == TokenInfo(1, 9, 0, Testparser.TOKEN___EOF));
|
||||||
|
|
||||||
lexer = new Testparser.Lexer("");
|
lexer = new Testparser.Lexer("");
|
||||||
assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 0, Testparser.TOKEN___EOF));
|
assert(lexer.lex_token(&token_info) == lexer.P_TOKEN);
|
||||||
|
assert(token_info == TokenInfo(0, 0, 0, Testparser.TOKEN___EOF));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user