Add FindLongestMatchResult
This commit is contained in:
parent
6b0fb4cb12
commit
11348ca351
@ -267,14 +267,6 @@ class <%= @classname %>
|
|||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
struct MatchInfo
|
|
||||||
{
|
|
||||||
size_t length;
|
|
||||||
size_t delta_row;
|
|
||||||
size_t delta_col;
|
|
||||||
const(State) * accepting_state;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Result
|
struct Result
|
||||||
{
|
{
|
||||||
enum Type
|
enum Type
|
||||||
@ -282,6 +274,7 @@ class <%= @classname %>
|
|||||||
DECODE_ERROR,
|
DECODE_ERROR,
|
||||||
DROP,
|
DROP,
|
||||||
TOKEN,
|
TOKEN,
|
||||||
|
UNEXPECTED_INPUT,
|
||||||
}
|
}
|
||||||
|
|
||||||
Type type;
|
Type type;
|
||||||
@ -349,18 +342,27 @@ class <%= @classname %>
|
|||||||
result.row = m_input_row;
|
result.row = m_input_row;
|
||||||
result.col = m_input_col;
|
result.col = m_input_col;
|
||||||
result.token = _TOKEN_COUNT;
|
result.token = _TOKEN_COUNT;
|
||||||
MatchInfo match_info;
|
auto match_result = find_longest_match();
|
||||||
find_longest_match(&result, &match_info);
|
if (match_result.is_eof())
|
||||||
if (result.token != _TOKEN_COUNT)
|
|
||||||
{
|
{
|
||||||
|
result.type = Result.Type.TOKEN;
|
||||||
|
result.token = TOKEN___EOF;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
if (match_info.accepting_state != null)
|
else if (match_result.is_decode_error())
|
||||||
{
|
{
|
||||||
uint token_to_accept = match_info.accepting_state.token;
|
result.type = Result.Type.DECODE_ERROR;
|
||||||
if (match_info.accepting_state.code_id.is_valid())
|
return result;
|
||||||
|
}
|
||||||
|
else if (match_result.is_unexpected_input())
|
||||||
{
|
{
|
||||||
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &result);
|
result.type = Result.Type.UNEXPECTED_INPUT;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
uint token_to_accept = match_result.accepting_state.token;
|
||||||
|
if (match_result.accepting_state.code_id.is_valid())
|
||||||
|
{
|
||||||
|
Token user_code_token = user_code(match_result.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_result.length)], &result);
|
||||||
/* An invalid Token from user_code() means that the user
|
/* An invalid Token from user_code() means that the user
|
||||||
* code did not explicitly return a token. So only override
|
* code did not explicitly return a token. So only override
|
||||||
* the token to return if the user code does explicitly
|
* the token to return if the user code does explicitly
|
||||||
@ -372,19 +374,19 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Update the input position tracking. */
|
/* Update the input position tracking. */
|
||||||
m_input_position += match_info.length;
|
m_input_position += match_result.length;
|
||||||
m_input_row += match_info.delta_row;
|
m_input_row += match_result.delta_row;
|
||||||
if (match_info.delta_row != 0u)
|
if (match_result.delta_row != 0u)
|
||||||
{
|
{
|
||||||
m_input_col = match_info.delta_col;
|
m_input_col = match_result.delta_col;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_input_col += match_info.delta_col;
|
m_input_col += match_result.delta_col;
|
||||||
}
|
}
|
||||||
result.token = token_to_accept;
|
result.token = token_to_accept;
|
||||||
result.length = match_info.length;
|
result.length = match_result.length;
|
||||||
if (match_info.accepting_state.drop)
|
if (match_result.accepting_state.drop)
|
||||||
{
|
{
|
||||||
result.type = Result.Type.DROP;
|
result.type = Result.Type.DROP;
|
||||||
}
|
}
|
||||||
@ -392,56 +394,166 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
result.type = Result.Type.TOKEN;
|
result.type = Result.Type.TOKEN;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void find_longest_match(Result * result, MatchInfo * match_info)
|
/**
|
||||||
|
* Result type for find_longest_match().
|
||||||
|
*
|
||||||
|
* Alternatives:
|
||||||
|
* - decode_error
|
||||||
|
* - eof
|
||||||
|
* - found_match(length, delta_row, delta_col, accepting_state)
|
||||||
|
* - unexpected_input(unexpected_input_length)
|
||||||
|
*/
|
||||||
|
struct FindLongestMatchResult
|
||||||
{
|
{
|
||||||
MatchInfo attempt_match_info;
|
enum : ubyte
|
||||||
|
{
|
||||||
|
FOUND_MATCH,
|
||||||
|
DECODE_ERROR,
|
||||||
|
EOF,
|
||||||
|
UNEXPECTED_INPUT,
|
||||||
|
}
|
||||||
|
|
||||||
|
ubyte type;
|
||||||
|
alias type this;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
size_t length;
|
||||||
|
size_t delta_row;
|
||||||
|
size_t delta_col;
|
||||||
|
const(State) * accepting_state;
|
||||||
|
}
|
||||||
|
size_t unexpected_input_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
this(ubyte type)
|
||||||
|
{
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
this(ubyte type, size_t unexpected_input_length)
|
||||||
|
{
|
||||||
|
this.type = type;
|
||||||
|
this.unexpected_input_length = unexpected_input_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
this(ubyte type, size_t length, size_t delta_row, size_t delta_col, const(State) * accepting_state)
|
||||||
|
{
|
||||||
|
this.type = type;
|
||||||
|
this.length = length;
|
||||||
|
this.delta_row = delta_row;
|
||||||
|
this.delta_col = delta_col;
|
||||||
|
this.accepting_state = accepting_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static FindLongestMatchResult found_match(size_t length, size_t delta_row, size_t delta_col, const(State) * accepting_state)
|
||||||
|
{
|
||||||
|
return FindLongestMatchResult(FOUND_MATCH, length, delta_row, delta_col, accepting_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
static FindLongestMatchResult decode_error()
|
||||||
|
{
|
||||||
|
return FindLongestMatchResult(DECODE_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
static FindLongestMatchResult eof()
|
||||||
|
{
|
||||||
|
return FindLongestMatchResult(EOF);
|
||||||
|
}
|
||||||
|
|
||||||
|
static FindLongestMatchResult unexpected_input(size_t unexpected_input_length)
|
||||||
|
{
|
||||||
|
return FindLongestMatchResult(UNEXPECTED_INPUT, unexpected_input_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_found_match()
|
||||||
|
{
|
||||||
|
return type == FOUND_MATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_decode_error()
|
||||||
|
{
|
||||||
|
return type == DECODE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_eof()
|
||||||
|
{
|
||||||
|
return type == EOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_unexpected_input()
|
||||||
|
{
|
||||||
|
return type == UNEXPECTED_INPUT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private FindLongestMatchResult find_longest_match()
|
||||||
|
{
|
||||||
|
FindLongestMatchResult longest_match = FindLongestMatchResult.found_match(0, 0, 0, null);
|
||||||
|
FindLongestMatchResult attempt_match = longest_match;
|
||||||
uint current_state = modes[m_mode].state_table_offset;
|
uint current_state = modes[m_mode].state_table_offset;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]);
|
auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match.length)..(m_input.length)]);
|
||||||
if (decoded.is_decode_error())
|
if (decoded.is_decode_error())
|
||||||
{
|
{
|
||||||
result.type = Result.Type.DECODE_ERROR;
|
return FindLongestMatchResult.decode_error();
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
bool lex_continue = false;
|
bool lex_continue = false;
|
||||||
if (!decoded.is_eof())
|
if (decoded.is_eof())
|
||||||
|
{
|
||||||
|
/* We hit EOF. */
|
||||||
|
if (longest_match.length > 0)
|
||||||
|
{
|
||||||
|
/* We have a match, so use it. */
|
||||||
|
return longest_match;
|
||||||
|
}
|
||||||
|
else if (attempt_match.length != 0)
|
||||||
|
{
|
||||||
|
/* There is a partial match - error! */
|
||||||
|
return FindLongestMatchResult.unexpected_input(attempt_match.length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Valid EOF return. */
|
||||||
|
return FindLongestMatchResult.eof();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
auto transition_result = transition(current_state, decoded.code_point);
|
auto transition_result = transition(current_state, decoded.code_point);
|
||||||
if (transition_result.found())
|
if (transition_result.found())
|
||||||
{
|
{
|
||||||
lex_continue = true;
|
lex_continue = true;
|
||||||
attempt_match_info.length += decoded.code_point_length;
|
attempt_match.length += decoded.code_point_length;
|
||||||
if (decoded.code_point == '\n')
|
if (decoded.code_point == '\n')
|
||||||
{
|
{
|
||||||
attempt_match_info.delta_row++;
|
attempt_match.delta_row++;
|
||||||
attempt_match_info.delta_col = 0u;
|
attempt_match.delta_col = 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
attempt_match_info.delta_col++;
|
attempt_match.delta_col++;
|
||||||
}
|
}
|
||||||
current_state = transition_result.destination();
|
current_state = transition_result.destination();
|
||||||
if (states[current_state].accepts())
|
if (states[current_state].accepts())
|
||||||
{
|
{
|
||||||
attempt_match_info.accepting_state = &states[current_state];
|
attempt_match.accepting_state = &states[current_state];
|
||||||
*match_info = attempt_match_info;
|
longest_match = attempt_match;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
else if (longest_match.length > 0)
|
||||||
else if (attempt_match_info.length == 0u)
|
|
||||||
{
|
{
|
||||||
result.token = TOKEN___EOF;
|
return longest_match;
|
||||||
result.type = Result.Type.TOKEN;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if (!lex_continue)
|
else
|
||||||
{
|
{
|
||||||
return;
|
return FindLongestMatchResult.unexpected_input(attempt_match.length + decoded.code_point_length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user