Refactor some of Lexer.attempt_lex_token() into find_longest_match()
This commit is contained in:
parent
b92679e0c2
commit
64974cc1e2
@ -190,6 +190,14 @@ class <%= @classname %>
|
|||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
|
struct MatchInfo
|
||||||
|
{
|
||||||
|
size_t length;
|
||||||
|
size_t delta_row;
|
||||||
|
size_t delta_col;
|
||||||
|
const(State) * accepting_state;
|
||||||
|
}
|
||||||
|
|
||||||
struct Result
|
struct Result
|
||||||
{
|
{
|
||||||
enum Type
|
enum Type
|
||||||
@ -264,14 +272,55 @@ class <%= @classname %>
|
|||||||
result.row = m_input_row;
|
result.row = m_input_row;
|
||||||
result.col = m_input_col;
|
result.col = m_input_col;
|
||||||
result.token = _TOKEN_COUNT;
|
result.token = _TOKEN_COUNT;
|
||||||
struct MatchInfo
|
MatchInfo match_info;
|
||||||
|
find_longest_match(&result, &match_info);
|
||||||
|
if (result.token != _TOKEN_COUNT)
|
||||||
{
|
{
|
||||||
size_t length;
|
return result;
|
||||||
size_t delta_row;
|
|
||||||
size_t delta_col;
|
|
||||||
const(State) * accepting_state;
|
|
||||||
}
|
}
|
||||||
MatchInfo longest_match_info;
|
if (match_info.accepting_state != null)
|
||||||
|
{
|
||||||
|
uint token_to_accept = match_info.accepting_state.token;
|
||||||
|
if (match_info.accepting_state.code_id != 0xFFFF_FFFFu)
|
||||||
|
{
|
||||||
|
uint user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &result);
|
||||||
|
/* A return of _TOKEN_COUNT from user_code() means
|
||||||
|
* that the user code did not explicitly return a
|
||||||
|
* token. So only override the token to return if the
|
||||||
|
* user code does explicitly return a token. */
|
||||||
|
if (user_code_token != _TOKEN_COUNT)
|
||||||
|
{
|
||||||
|
token_to_accept = user_code_token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the input position tracking. */
|
||||||
|
m_input_position += match_info.length;
|
||||||
|
m_input_row += match_info.delta_row;
|
||||||
|
if (match_info.delta_row != 0u)
|
||||||
|
{
|
||||||
|
m_input_col = match_info.delta_col;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_input_col += match_info.delta_col;
|
||||||
|
}
|
||||||
|
result.token = token_to_accept;
|
||||||
|
result.length = match_info.length;
|
||||||
|
if (result.token == _TOKEN_DROP)
|
||||||
|
{
|
||||||
|
result.type = Result.Type.DROP;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.type = Result.Type.TOKEN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void find_longest_match(Result * result, MatchInfo * match_info)
|
||||||
|
{
|
||||||
MatchInfo attempt_match_info;
|
MatchInfo attempt_match_info;
|
||||||
uint current_state = modes[m_mode].state_table_offset;
|
uint current_state = modes[m_mode].state_table_offset;
|
||||||
for (;;)
|
for (;;)
|
||||||
@ -281,7 +330,7 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
result.type = Result.Type.DECODE_ERROR;
|
result.type = Result.Type.DECODE_ERROR;
|
||||||
result.token = _TOKEN_DECODE_ERROR;
|
result.token = _TOKEN_DECODE_ERROR;
|
||||||
return result;
|
return;
|
||||||
}
|
}
|
||||||
bool lex_continue = false;
|
bool lex_continue = false;
|
||||||
if (!decoded.is_eof())
|
if (!decoded.is_eof())
|
||||||
@ -305,56 +354,21 @@ class <%= @classname %>
|
|||||||
(states[current_state].code_id != 0xFFFF_FFFFu))
|
(states[current_state].code_id != 0xFFFF_FFFFu))
|
||||||
{
|
{
|
||||||
attempt_match_info.accepting_state = &states[current_state];
|
attempt_match_info.accepting_state = &states[current_state];
|
||||||
longest_match_info = attempt_match_info;
|
*match_info = attempt_match_info;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (attempt_match_info.length == 0u)
|
else if (attempt_match_info.length == 0u)
|
||||||
{
|
{
|
||||||
result.token = TOKEN_0EOF;
|
result.token = TOKEN_0EOF;
|
||||||
break;
|
result.type = Result.Type.TOKEN;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (!lex_continue && (longest_match_info.accepting_state != null))
|
if (!lex_continue)
|
||||||
{
|
{
|
||||||
uint token_to_accept = longest_match_info.accepting_state.token;
|
return;
|
||||||
if (longest_match_info.accepting_state.code_id != 0xFFFF_FFFFu)
|
|
||||||
{
|
|
||||||
uint user_code_token = user_code(longest_match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &result);
|
|
||||||
/* A return of _TOKEN_COUNT from user_code() means
|
|
||||||
* that the user code did not explicitly return a
|
|
||||||
* token. So only override the token to return if the
|
|
||||||
* user code does explicitly return a token. */
|
|
||||||
if (user_code_token != _TOKEN_COUNT)
|
|
||||||
{
|
|
||||||
token_to_accept = user_code_token;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update the input position tracking. */
|
|
||||||
m_input_position += longest_match_info.length;
|
|
||||||
m_input_row += longest_match_info.delta_row;
|
|
||||||
if (longest_match_info.delta_row != 0u)
|
|
||||||
{
|
|
||||||
m_input_col = longest_match_info.delta_col;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_input_col += longest_match_info.delta_col;
|
|
||||||
}
|
|
||||||
result.token = token_to_accept;
|
|
||||||
result.length = longest_match_info.length;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (result.token == _TOKEN_DROP)
|
|
||||||
{
|
|
||||||
result.type = Result.Type.DROP;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result.type = Result.Type.TOKEN;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private uint transition(uint current_state, uint code_point)
|
private uint transition(uint current_state, uint code_point)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user