Add Position struct to track text positions
This commit is contained in:
parent
80ac6c17f0
commit
1c50d37a3e
@ -54,6 +54,20 @@ class <%= @classname %>
|
|||||||
<% end %>
|
<% end %>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A structure to keep track of parser position.
|
||||||
|
*
|
||||||
|
* This is useful for reporting errors, etc...
|
||||||
|
*/
|
||||||
|
static struct Position
|
||||||
|
{
|
||||||
|
/** Input text row (0-based). */
|
||||||
|
uint row;
|
||||||
|
|
||||||
|
/** Input text column (0-based). */
|
||||||
|
uint col;
|
||||||
|
}
|
||||||
|
|
||||||
static class Decoder
|
static class Decoder
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
@ -196,17 +210,15 @@ class <%= @classname %>
|
|||||||
|
|
||||||
public static struct TokenInfo
|
public static struct TokenInfo
|
||||||
{
|
{
|
||||||
size_t row;
|
Position position;
|
||||||
size_t col;
|
|
||||||
size_t length;
|
size_t length;
|
||||||
Token token;
|
Token token;
|
||||||
ParserValue pvalue;
|
ParserValue pvalue;
|
||||||
}
|
}
|
||||||
|
|
||||||
private string m_input;
|
private string m_input;
|
||||||
private size_t m_input_position;
|
private size_t m_input_index;
|
||||||
private size_t m_input_row;
|
private Position m_input_position;
|
||||||
private size_t m_input_col;
|
|
||||||
private size_t m_mode;
|
private size_t m_mode;
|
||||||
|
|
||||||
this(string input)
|
this(string input)
|
||||||
@ -274,8 +286,7 @@ class <%= @classname %>
|
|||||||
private size_t attempt_lex_token(TokenInfo * out_token_info)
|
private size_t attempt_lex_token(TokenInfo * out_token_info)
|
||||||
{
|
{
|
||||||
TokenInfo token_info;
|
TokenInfo token_info;
|
||||||
token_info.row = m_input_row;
|
token_info.position = m_input_position;
|
||||||
token_info.col = m_input_col;
|
|
||||||
token_info.token = INVALID_TOKEN_ID;
|
token_info.token = INVALID_TOKEN_ID;
|
||||||
*out_token_info = token_info; // TODO: remove
|
*out_token_info = token_info; // TODO: remove
|
||||||
MatchInfo match_info;
|
MatchInfo match_info;
|
||||||
@ -287,7 +298,7 @@ class <%= @classname %>
|
|||||||
Token token_to_accept = match_info.accepting_state.token;
|
Token token_to_accept = match_info.accepting_state.token;
|
||||||
if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID)
|
if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID)
|
||||||
{
|
{
|
||||||
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &token_info);
|
Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_index..(m_input_index + match_info.length)], &token_info);
|
||||||
/* An invalid Token from user_code() means that the user
|
/* An invalid Token from user_code() means that the user
|
||||||
* code did not explicitly return a token. So only override
|
* code did not explicitly return a token. So only override
|
||||||
* the token to return if the user code does explicitly
|
* the token to return if the user code does explicitly
|
||||||
@ -299,15 +310,15 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Update the input position tracking. */
|
/* Update the input position tracking. */
|
||||||
m_input_position += match_info.length;
|
m_input_index += match_info.length;
|
||||||
m_input_row += match_info.delta_row;
|
m_input_position.row += match_info.delta_position.row;
|
||||||
if (match_info.delta_row != 0u)
|
if (match_info.delta_position.row != 0u)
|
||||||
{
|
{
|
||||||
m_input_col = match_info.delta_col;
|
m_input_position.col = match_info.delta_position.col;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_input_col += match_info.delta_col;
|
m_input_position.col += match_info.delta_position.col;
|
||||||
}
|
}
|
||||||
if (token_to_accept == INVALID_TOKEN_ID)
|
if (token_to_accept == INVALID_TOKEN_ID)
|
||||||
{
|
{
|
||||||
@ -331,8 +342,7 @@ class <%= @classname %>
|
|||||||
struct MatchInfo
|
struct MatchInfo
|
||||||
{
|
{
|
||||||
size_t length;
|
size_t length;
|
||||||
size_t delta_row;
|
Position delta_position;
|
||||||
size_t delta_col;
|
|
||||||
const(State) * accepting_state;
|
const(State) * accepting_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,7 +364,7 @@ class <%= @classname %>
|
|||||||
uint current_state = modes[m_mode].state_table_offset;
|
uint current_state = modes[m_mode].state_table_offset;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
string input = m_input[(m_input_position + attempt_match.length)..(m_input.length)];
|
string input = m_input[(m_input_index + attempt_match.length)..(m_input.length)];
|
||||||
CodePoint code_point;
|
CodePoint code_point;
|
||||||
ubyte code_point_length;
|
ubyte code_point_length;
|
||||||
size_t result = Decoder.decode_code_point(input, code_point, code_point_length);
|
size_t result = Decoder.decode_code_point(input, code_point, code_point_length);
|
||||||
@ -367,12 +377,12 @@ class <%= @classname %>
|
|||||||
attempt_match.length += code_point_length;
|
attempt_match.length += code_point_length;
|
||||||
if (code_point == '\n')
|
if (code_point == '\n')
|
||||||
{
|
{
|
||||||
attempt_match.delta_row++;
|
attempt_match.delta_position.row++;
|
||||||
attempt_match.delta_col = 0u;
|
attempt_match.delta_position.col = 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
attempt_match.delta_col++;
|
attempt_match.delta_position.col++;
|
||||||
}
|
}
|
||||||
current_state = transition_state;
|
current_state = transition_state;
|
||||||
if (states[current_state].accepts)
|
if (states[current_state].accepts)
|
||||||
|
@ -47,23 +47,23 @@ unittest
|
|||||||
string input = "5 + 4 * \n677 + 567";
|
string input = "5 + 4 * \n677 + 567";
|
||||||
Testparser.Lexer lexer = new Testparser.Lexer(input);
|
Testparser.Lexer lexer = new Testparser.Lexer(input);
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(0, 0, 1, Testparser.TOKEN_int));
|
assert(token_info == TokenInfo(Testparser.Position(0, 0), 1, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(0, 2, 1, Testparser.TOKEN_plus));
|
assert(token_info == TokenInfo(Testparser.Position(0, 2), 1, Testparser.TOKEN_plus));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(0, 4, 1, Testparser.TOKEN_int));
|
assert(token_info == TokenInfo(Testparser.Position(0, 4), 1, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(0, 6, 1, Testparser.TOKEN_times));
|
assert(token_info == TokenInfo(Testparser.Position(0, 6), 1, Testparser.TOKEN_times));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(1, 0, 3, Testparser.TOKEN_int));
|
assert(token_info == TokenInfo(Testparser.Position(1, 0), 3, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(1, 4, 1, Testparser.TOKEN_plus));
|
assert(token_info == TokenInfo(Testparser.Position(1, 4), 1, Testparser.TOKEN_plus));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(1, 6, 3, Testparser.TOKEN_int));
|
assert(token_info == TokenInfo(Testparser.Position(1, 6), 3, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(1, 9, 0, Testparser.TOKEN___EOF));
|
assert(token_info == TokenInfo(Testparser.Position(1, 9), 0, Testparser.TOKEN___EOF));
|
||||||
|
|
||||||
lexer = new Testparser.Lexer("");
|
lexer = new Testparser.Lexer("");
|
||||||
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN);
|
||||||
assert(token_info == TokenInfo(0, 0, 0, Testparser.TOKEN___EOF));
|
assert(token_info == TokenInfo(Testparser.Position(0, 0), 0, Testparser.TOKEN___EOF));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user