Start on Parser.parse()
This commit is contained in:
parent
f17efe8c82
commit
84c4a16ce6
@ -9,10 +9,11 @@ class <%= @classname %>
|
|||||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||||
TOKEN_<%= token.c_name %> = <%= index %>,
|
TOKEN_<%= token.c_name %> = <%= index %>,
|
||||||
<% end %>
|
<% end %>
|
||||||
TOKEN_EOF = <%= TOKEN_EOF %>,
|
_TOKEN_COUNT = <%= @grammar.tokens.size %>,
|
||||||
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
_TOKEN_EOF = <%= TOKEN_EOF %>,
|
||||||
TOKEN_DROP = <%= TOKEN_DROP %>,
|
_TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||||
TOKEN_NONE = <%= TOKEN_NONE %>,
|
_TOKEN_DROP = <%= TOKEN_DROP %>,
|
||||||
|
_TOKEN_NONE = <%= TOKEN_NONE %>,
|
||||||
}
|
}
|
||||||
|
|
||||||
static immutable string TokenNames[] = [
|
static immutable string TokenNames[] = [
|
||||||
@ -155,7 +156,7 @@ class <%= @classname %>
|
|||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
LexedToken lt = attempt_lex_token();
|
LexedToken lt = attempt_lex_token();
|
||||||
if (lt.token != TOKEN_DROP)
|
if (lt.token != _TOKEN_DROP)
|
||||||
{
|
{
|
||||||
return lt;
|
return lt;
|
||||||
}
|
}
|
||||||
@ -164,7 +165,7 @@ class <%= @classname %>
|
|||||||
|
|
||||||
private LexedToken attempt_lex_token()
|
private LexedToken attempt_lex_token()
|
||||||
{
|
{
|
||||||
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
|
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
|
||||||
struct LexedTokenState
|
struct LexedTokenState
|
||||||
{
|
{
|
||||||
size_t length;
|
size_t length;
|
||||||
@ -173,7 +174,7 @@ class <%= @classname %>
|
|||||||
uint token;
|
uint token;
|
||||||
}
|
}
|
||||||
LexedTokenState last_accepts_info;
|
LexedTokenState last_accepts_info;
|
||||||
last_accepts_info.token = TOKEN_NONE;
|
last_accepts_info.token = _TOKEN_NONE;
|
||||||
LexedTokenState attempt_info;
|
LexedTokenState attempt_info;
|
||||||
uint current_state;
|
uint current_state;
|
||||||
for (;;)
|
for (;;)
|
||||||
@ -181,7 +182,7 @@ class <%= @classname %>
|
|||||||
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
|
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
|
||||||
if (decoded.code_point == Decoder.CODE_POINT_INVALID)
|
if (decoded.code_point == Decoder.CODE_POINT_INVALID)
|
||||||
{
|
{
|
||||||
lt.token = TOKEN_DECODE_ERROR;
|
lt.token = _TOKEN_DECODE_ERROR;
|
||||||
return lt;
|
return lt;
|
||||||
}
|
}
|
||||||
bool lex_continue = false;
|
bool lex_continue = false;
|
||||||
@ -202,7 +203,7 @@ class <%= @classname %>
|
|||||||
attempt_info.delta_col++;
|
attempt_info.delta_col++;
|
||||||
}
|
}
|
||||||
current_state = dest;
|
current_state = dest;
|
||||||
if (states[current_state].accepts != TOKEN_NONE)
|
if (states[current_state].accepts != _TOKEN_NONE)
|
||||||
{
|
{
|
||||||
attempt_info.token = states[current_state].accepts;
|
attempt_info.token = states[current_state].accepts;
|
||||||
last_accepts_info = attempt_info;
|
last_accepts_info = attempt_info;
|
||||||
@ -211,12 +212,12 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
else if (attempt_info.length == 0u)
|
else if (attempt_info.length == 0u)
|
||||||
{
|
{
|
||||||
lt.token = TOKEN_EOF;
|
lt.token = _TOKEN_EOF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!lex_continue)
|
if (!lex_continue)
|
||||||
{
|
{
|
||||||
if (last_accepts_info.token != TOKEN_NONE)
|
if (last_accepts_info.token != _TOKEN_NONE)
|
||||||
{
|
{
|
||||||
lt.token = last_accepts_info.token;
|
lt.token = last_accepts_info.token;
|
||||||
lt.length = last_accepts_info.length;
|
lt.length = last_accepts_info.length;
|
||||||
@ -256,15 +257,16 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
private struct Shift
|
private struct Shift
|
||||||
{
|
{
|
||||||
uint token_id;
|
uint symbol;
|
||||||
uint state_id;
|
uint state;
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct Reduce
|
private struct Reduce
|
||||||
{
|
{
|
||||||
uint token_id;
|
uint token;
|
||||||
uint rule_id;
|
uint rule;
|
||||||
uint rule_set_id;
|
uint rule_set;
|
||||||
|
uint n_states;
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct State
|
private struct State
|
||||||
@ -284,7 +286,7 @@ class <%= @classname %>
|
|||||||
|
|
||||||
private static immutable Reduce reduces[] = [
|
private static immutable Reduce reduces[] = [
|
||||||
<% reduce_table.each do |reduce| %>
|
<% reduce_table.each do |reduce| %>
|
||||||
Reduce(<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u),
|
Reduce(<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u),
|
||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -300,5 +302,82 @@ class <%= @classname %>
|
|||||||
{
|
{
|
||||||
m_lexer = new Lexer(input, input_length);
|
m_lexer = new Lexer(input, input_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse()
|
||||||
|
{
|
||||||
|
Lexer.LexedToken lexed_token;
|
||||||
|
uint token = _TOKEN_NONE;
|
||||||
|
uint[] states = new uint[](1);
|
||||||
|
uint reduced_rule_set = 0xFFFFFFFFu;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (token == _TOKEN_NONE)
|
||||||
|
{
|
||||||
|
lexed_token = m_lexer.lex_token();
|
||||||
|
token = lexed_token.token;
|
||||||
|
}
|
||||||
|
uint shift_state = 0xFFFFFFFFu;
|
||||||
|
if (reduced_rule_set != 0xFFFFFFFFu)
|
||||||
|
{
|
||||||
|
shift_state = check_shift(states[$-1], reduced_rule_set);
|
||||||
|
reduced_rule_set = 0xFFFFFFFFu;
|
||||||
|
}
|
||||||
|
if (shift_state == 0xFFFFFFFFu)
|
||||||
|
{
|
||||||
|
shift_state = check_shift(states[$-1], token);
|
||||||
|
}
|
||||||
|
if (shift_state != 0xFFFFFFFFu)
|
||||||
|
{
|
||||||
|
if (token == _TOKEN_EOF)
|
||||||
|
{
|
||||||
|
/* Successful parse. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
states ~= shift_state;
|
||||||
|
token = _TOKEN_NONE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint reduce_index = check_reduce(states[$-1], token);
|
||||||
|
if (reduce_index != 0xFFFFFFFFu)
|
||||||
|
{
|
||||||
|
reduced_rule_set = reduces[reduce_index].rule_set;
|
||||||
|
states.length -= reduces[reduce_index].n_states;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Error, unexpected token. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private uint check_shift(uint state, uint token)
|
||||||
|
{
|
||||||
|
uint start = states[state].shift_table_index;
|
||||||
|
uint end = start + states[state].n_shift_entries;
|
||||||
|
for (uint i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
if (shifts[i].symbol == token)
|
||||||
|
{
|
||||||
|
return shifts[i].state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0xFFFFFFFFu;
|
||||||
|
}
|
||||||
|
|
||||||
|
private uint check_reduce(uint state, uint token)
|
||||||
|
{
|
||||||
|
uint start = states[state].reduce_table_index;
|
||||||
|
uint end = start + states[state].n_reduce_entries;
|
||||||
|
for (uint i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
if ((reduces[i].token == token) ||
|
||||||
|
(reduces[i].token == _TOKEN_NONE))
|
||||||
|
{
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0xFFFFFFFFu;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,10 +79,12 @@ class Propane
|
|||||||
reduce_entries =
|
reduce_entries =
|
||||||
case ra = item_set.reduce_actions
|
case ra = item_set.reduce_actions
|
||||||
when Rule
|
when Rule
|
||||||
[{token_id: TOKEN_NONE, rule_id: ra.id, rule_set_id: ra.rule_set.id}]
|
[{token_id: TOKEN_NONE, rule_id: ra.id,
|
||||||
|
rule_set_id: ra.rule_set.id, n_states: ra.components.size}]
|
||||||
when Hash
|
when Hash
|
||||||
ra.map do |token, rule|
|
ra.map do |token, rule|
|
||||||
{token_id: token.id, rule_id: rule.id, rule_set_id: rule.rule_set.id}
|
{token_id: token.id, rule_id: rule.id,
|
||||||
|
rule_set_id: rule.rule_set.id, n_states: rule.components.size}
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
[]
|
[]
|
||||||
|
@ -77,8 +77,8 @@ unittest
|
|||||||
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
|
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
|
||||||
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
|
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
|
||||||
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
|
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
|
||||||
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
|
assert(lexer.lex_token() == LT(1, 9, 0, Testparser._TOKEN_EOF));
|
||||||
|
|
||||||
lexer = new Testparser.Lexer(null, 0u);
|
lexer = new Testparser.Lexer(null, 0u);
|
||||||
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
|
assert(lexer.lex_token() == LT(0, 0, 0, Testparser._TOKEN_EOF));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user