Compare commits

..

2 Commits

Author SHA1 Message Date
140b2d8350 User guide: document user code blocks 2023-08-24 09:40:08 -04:00
3c8794058f Add C backend - close #4 2023-08-24 09:40:01 -04:00
30 changed files with 2375 additions and 190 deletions

936
assets/parser.c.erb Normal file
View File

@ -0,0 +1,936 @@
#include "<%= File.basename(output_file).sub(%r{\.[a-z]+$}, "") %>.h"
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
/**************************************************************************
* User code blocks
*************************************************************************/
<%= @grammar.code_blocks.fetch("", "") %>
/**************************************************************************
* Private types
*************************************************************************/
<% if @grammar.prefix.upcase != "P_" %>
/* Result codes. */
#define P_SUCCESS 0u
#define P_DECODE_ERROR 1u
#define P_UNEXPECTED_INPUT 2u
#define P_UNEXPECTED_TOKEN 3u
#define P_DROP 4u
#define P_EOF 5u
<% end %>
/* An invalid ID value. */
#define INVALID_ID ((size_t)-1)
/**************************************************************************
* State initialization
*************************************************************************/
/**
* Initialize lexer/parser context structure.
*
* @param[out] context
* Lexer/parser context structure.
* @param input
* Text input.
* @param input_length
* Text input length.
*/
void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length)
{
/* New default-initialized context structure. */
<%= @grammar.prefix %>context_t newcontext = {0};
/* Lexer initialization. */
newcontext.input = input;
newcontext.input_length = input_length;
newcontext.mode = <%= @lexer.mode_id("default") %>;
/* Copy to the user's context structure. */
*context = newcontext;
}
/**************************************************************************
* Decoder
*************************************************************************/
/**
* Decode a UTF-8 code point.
*
* @param input
* Text input to decode.
* @param input_length
* Input text length.
* @param[out] out_code_point
* The decoded code point is stored here if the return value is P_SUCCESS.
* @param[out] out_code_point_length
* The number of bytes the code point used is stored here if the return value
* is P_SUCCESS.
*
* @retval P_SUCCESS on a successful code point decode
* @retval P_DECODE_ERROR when an encoding error is observed
* @retval P_EOF when the end of the text input is reached
*/
size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length,
<%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length)
{
if (input_length == 0u)
{
return P_EOF;
}
char c = input[0];
<%= @grammar.prefix %>code_point_t code_point;
uint8_t code_point_length;
if ((c & 0x80u) == 0u)
{
code_point = c;
code_point_length = 1u;
}
else
{
uint8_t following_bytes;
if ((c & 0xE0u) == 0xC0u)
{
code_point = c & 0x1Fu;
following_bytes = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
code_point = c & 0x0Fu;
following_bytes = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
code_point = c & 0x07u;
following_bytes = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
code_point = c & 0x03u;
following_bytes = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
code_point = c & 0x01u;
following_bytes = 5u;
}
else
{
return P_DECODE_ERROR;
}
if (input_length <= following_bytes)
{
return P_DECODE_ERROR;
}
code_point_length = (uint8_t)(following_bytes + 1u);
for (size_t i = 0u; i < following_bytes; i++)
{
char b = input[i + 1u];
if ((b & 0xC0u) != 0x80u)
{
return P_DECODE_ERROR;
}
code_point = (code_point << 6u) | (b & 0x3Fu);
}
}
*out_code_point = code_point;
*out_code_point_length = code_point_length;
return P_SUCCESS;
}
/**************************************************************************
* Lexer
*************************************************************************/
/** Lexer state ID type. */
typedef <%= get_type_for(@lexer.state_table.size) %> lexer_state_id_t;
/** Invalid lexer state ID. */
#define INVALID_LEXER_STATE_ID <%= @lexer.state_table.size %>u
/** Lexer user code ID type. */
<% user_code_id_count = (@grammar.patterns.map(&:code_id).compact.max || 0) + 1 %>
typedef <%= get_type_for(user_code_id_count) %> lexer_user_code_id_t;
/** Invalid lexer user code ID. */
#define INVALID_USER_CODE_ID <%= user_code_id_count %>u
/**
* Lexer transition table entry.
*
* An incoming code point matching the range for a transition entry will cause
* the lexer to progress to the destination state.
*/
typedef struct
{
/** First code point in the range for this transition. */
<%= @grammar.prefix %>code_point_t first;
/** Last code point in the range for this transition. */
<%= @grammar.prefix %>code_point_t last;
/** Destination lexer state ID for this transition. */
lexer_state_id_t destination_state;
} lexer_transition_t;
/** Lexer state table entry. */
typedef struct
{
/** Index to the transition table for this state. */
<%= get_type_for(@lexer.transition_table.size - 1) %> transition_table_index;
/** Number of transition table entries for this state. */
<%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions;
/** Lexer token formed at this state. */
<%= @grammar.prefix %>token_t token;
/** Lexer user code ID to execute at this state. */
lexer_user_code_id_t code_id;
/** Whether this state matches a lexer pattern. */
bool accepts;
} lexer_state_t;
/** Lexer mode table entry. */
typedef struct
{
/** Offset in the state table to be used for this mode. */
uint32_t state_table_offset;
} lexer_mode_t;
/**
* Lexer match info structure.
*
* This structure holds output values from the lexer upon a successful pattern
* match.
*/
typedef struct
{
/** Number of bytes of input text used to match. */
size_t length;
/** Input text position delta. */
<%= @grammar.prefix %>position_t delta_position;
/** Accepting lexer state from the match. */
lexer_state_t const * accepting_state;
} lexer_match_info_t;
/** Lexer transition table. */
static lexer_transition_t lexer_transition_table[] = {
<% @lexer.transition_table.each do |transition_table_entry| %>
{<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u},
<% end %>
};
/** Lexer state table. */
static lexer_state_t lexer_state_table[] = {
<% @lexer.state_table.each do |state_table_entry| %>
{<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:token] || "INVALID_TOKEN_ID" %>, <%= state_table_entry[:code_id] || "INVALID_USER_CODE_ID" %>, <%= state_table_entry[:accepts] %>},
<% end %>
};
/** Lexer mode table. */
static lexer_mode_t lexer_mode_table[] = {
<% @lexer.mode_table.each do |mode_table_entry| %>
{<%= mode_table_entry[:state_table_offset] %>},
<% end %>
};
/**
* Execute user code associated with a lexer pattern.
*
* @param context
* Lexer/parser context structure.
* @param code_id
* The ID of the user code block to execute.
* @param match
* Matched text for this pattern.
* @param match_length
* Matched text length.
* @param out_token_info
* Lexer token info in progress.
*
* @return Token to accept, or invalid token if the user code does
* not explicitly return a token.
*/
static <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context,
lexer_user_code_id_t code_id, uint8_t const * match,
size_t match_length, <%= @grammar.prefix %>token_info_t * out_token_info)
{
switch (code_id)
{
<% @grammar.patterns.each do |pattern| %>
<% if pattern.code_id %>
case <%= pattern.code_id %>u: {
<%= expand_code(pattern.code, false, nil, pattern) %>
} break;
<% end %>
<% end %>
default: break;
}
return INVALID_TOKEN_ID;
}
/**
* Check if there is a transition from the current lexer state to another
* based on the given input code point.
*
* @param current_state
* Current lexer state.
* @param code_point
* Input code point.
*
* @return Lexer state to transition to, or INVALID_LEXER_STATE_ID if none.
*/
static lexer_state_id_t check_lexer_transition(uint32_t current_state, uint32_t code_point)
{
uint32_t transition_table_index = lexer_state_table[current_state].transition_table_index;
for (uint32_t i = 0u; i < lexer_state_table[current_state].n_transitions; i++)
{
if ((lexer_transition_table[transition_table_index + i].first <= code_point) &&
(code_point <= lexer_transition_table[transition_table_index + i].last))
{
return lexer_transition_table[transition_table_index + i].destination_state;
}
}
return INVALID_LEXER_STATE_ID;
}
/**
* Find the longest lexer pattern match at the current position.
*
* @param context
* Lexer/parser context structure.
* @param[out] out_token_info
* The lexed token information is stored here if the return value is
* P_SUCCESS.
*
* @reval P_SUCCESS
* A token was successfully lexed.
* @reval P_DECODE_ERROR
* The decoder encountered invalid text encoding.
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
* @retval P_EOF
* The end of the text input was reached.
*/
static size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length)
{
lexer_match_info_t longest_match = {0};
lexer_match_info_t attempt_match = {0};
*out_match_info = longest_match;
uint32_t current_state = lexer_mode_table[context->mode].state_table_offset;
for (;;)
{
size_t const input_index = context->input_index + attempt_match.length;
uint8_t const * input = &context->input[input_index];
size_t input_length = context->input_length - input_index;
<%= @grammar.prefix %>code_point_t code_point;
uint8_t code_point_length;
size_t result = <%= @grammar.prefix %>decode_code_point(input, input_length, &code_point, &code_point_length);
switch (result)
{
case P_SUCCESS:
lexer_state_id_t transition_state = check_lexer_transition(current_state, code_point);
if (transition_state != INVALID_LEXER_STATE_ID)
{
attempt_match.length += code_point_length;
if (code_point == '\n')
{
attempt_match.delta_position.row++;
attempt_match.delta_position.col = 0u;
}
else
{
attempt_match.delta_position.col++;
}
current_state = transition_state;
if (lexer_state_table[current_state].accepts)
{
attempt_match.accepting_state = &lexer_state_table[current_state];
longest_match = attempt_match;
}
}
else if (longest_match.length > 0)
{
*out_match_info = longest_match;
return P_SUCCESS;
}
else
{
*out_unexpected_input_length = attempt_match.length + code_point_length;
return P_UNEXPECTED_INPUT;
}
break;
case P_EOF:
/* We hit EOF. */
if (longest_match.length > 0)
{
/* We have a match, so use it. */
*out_match_info = longest_match;
return P_SUCCESS;
}
else if (attempt_match.length != 0)
{
/* There is a partial match - error! */
*out_unexpected_input_length = attempt_match.length;
return P_UNEXPECTED_INPUT;
}
else
{
/* Valid EOF return. */
return P_EOF;
}
break;
case P_DECODE_ERROR:
/* If we see a decode error, we may be partially in the middle of
* matching a pattern, so return the attempted match info so that
* the input text position can be updated. */
*out_match_info = attempt_match;
return result;
default:
return result;
}
}
}
/**
* Attempt to lex the next token in the input stream.
*
* @param context
* Lexer/parser context structure.
* @param[out] out_token_info
* The lexed token information is stored here if the return value is
* P_SUCCESS.
*
* @reval P_SUCCESS
* A token was successfully lexed.
* @reval P_DECODE_ERROR
* The decoder encountered invalid text encoding.
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
* @retval P_DROP
* A drop pattern was matched so the lexer should continue.
*/
static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{
<%= @grammar.prefix %>token_info_t token_info = {0};
token_info.position = context->text_position;
token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove
lexer_match_info_t match_info;
size_t unexpected_input_length;
size_t result = find_longest_match(context, &match_info, &unexpected_input_length);
switch (result)
{
case P_SUCCESS:
<%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state->token;
if (match_info.accepting_state->code_id != INVALID_USER_CODE_ID)
{
uint8_t const * match = &context->input[context->input_index];
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
match_info.accepting_state->code_id, match, match_info.length, &token_info);
/* An invalid token returned from lexer_user_code() means that the
* user code did not explicitly return a token. So only override
* the token to return if the user code does explicitly return a
* token. */
if (user_code_token != INVALID_TOKEN_ID)
{
token_to_accept = user_code_token;
}
}
/* Update the input position tracking. */
context->input_index += match_info.length;
context->text_position.row += match_info.delta_position.row;
if (match_info.delta_position.row != 0u)
{
context->text_position.col = match_info.delta_position.col;
}
else
{
context->text_position.col += match_info.delta_position.col;
}
if (token_to_accept == INVALID_TOKEN_ID)
{
return P_DROP;
}
token_info.token = token_to_accept;
token_info.length = match_info.length;
*out_token_info = token_info;
return P_SUCCESS;
case P_EOF:
token_info.token = TOKEN___EOF;
*out_token_info = token_info;
return P_SUCCESS;
case P_DECODE_ERROR:
/* Update the input position tracking. */
context->input_index += match_info.length;
context->text_position.row += match_info.delta_position.row;
if (match_info.delta_position.row != 0u)
{
context->text_position.col = match_info.delta_position.col;
}
else
{
context->text_position.col += match_info.delta_position.col;
}
return result;
default:
return result;
}
}
/**
* Lex the next token in the input stream.
*
* @param context
* Lexer/parser context structure.
* @param[out] out_token_info
* The lexed token information is stored here if the return value is
* P_SUCCESS.
*
* @reval P_SUCCESS
* A token was successfully lexed.
* @reval P_DECODE_ERROR
* The decoder encountered invalid text encoding.
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{
for (;;)
{
size_t result = attempt_lex_token(context, out_token_info);
if (result != P_DROP)
{
return result;
}
}
}
/**************************************************************************
* Parser
*************************************************************************/
/** Reduce ID type. */
typedef <%= get_type_for(@parser.reduce_table.size) %> reduce_id_t;
/**
* A symbol ID can hold either a token ID or a rule set ID.
*
* Token IDs and rule set IDs share the same namespace, with rule set IDs
* beginning after token IDs end.
*/
typedef <%= get_type_for(@parser.rule_sets.map(&:last).map(&:id).max) %> symbol_id_t;
/** Parser state ID type. */
typedef <%= get_type_for(@parser.state_table.size) %> parser_state_id_t;
/** Parser rule ID type. */
typedef <%= get_type_for(@grammar.rules.size) %> rule_id_t;
/** Parser shift ID type. */
typedef <%= get_type_for(@parser.shift_table.size) %> shift_id_t;
/** Shift table entry. */
typedef struct
{
/** Token or rule set ID. */
symbol_id_t symbol_id;
/** Parser state to shift to. */
parser_state_id_t state_id;
} shift_t;
/** Reduce table entry. */
typedef struct
{
/** Lookahead token. */
<%= @grammar.prefix %>token_t token;
/**
* Rule ID.
*
* This is used to execute the parser user code block associated with a
* grammar rule.
*/
rule_id_t rule;
/**
* Rule set ID.
*
* This is used as the new top symbol ID of the parse stack after this
* reduce action.
*/
symbol_id_t rule_set;
/**
* Number of states leading to this reduce action.
*
* This is the number of entries popped from the parse stack after this
* reduce action.
*/
parser_state_id_t n_states;
} reduce_t;
/** Parser state entry. */
typedef struct
{
/** First shift table entry for this parser state. */
shift_id_t shift_table_index;
/** Number of shift table entries for this parser state. */
shift_id_t n_shift_entries;
/** First reduce table entry for this parser state. */
reduce_id_t reduce_table_index;
/** Number of reduce table entries for this parser state. */
reduce_id_t n_reduce_entries;
} parser_state_t;
/**
* Structure to hold a state ID and value pair.
*
* A stack of these structures makes up the parse stack.
*/
typedef struct
{
/** Parser state ID. */
size_t state_id;
/** Parser value from this state. */
<%= @grammar.prefix %>value_t pvalue;
} state_value_t;
/** Parser shift table. */
static const shift_t parser_shift_table[] = {
<% @parser.shift_table.each do |shift| %>
{<%= shift[:symbol_id] %>u, <%= shift[:state_id] %>u},
<% end %>
};
/** Parser reduce table. */
static const reduce_t parser_reduce_table[] = {
<% @parser.reduce_table.each do |reduce| %>
{<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u},
<% end %>
};
/** Parser state table. */
static const parser_state_t parser_state_table[] = {
<% @parser.state_table.each do |state| %>
{<%= state[:shift_index] %>u, <%= state[:n_shifts] %>u, <%= state[:reduce_index] %>u, <%= state[:n_reduces] %>u},
<% end %>
};
/* state_values stack functionality */
/** state_values stack type. */
typedef struct
{
size_t length;
size_t capacity;
state_value_t * entries;
} state_values_stack_t;
/**
* Initialize state_values stack structure.
*
* @param stack
* state_values stack structure.
*/
static void state_values_stack_init(state_values_stack_t * stack)
{
const size_t initial_capacity = 10u;
stack->length = 0u;
stack->capacity = initial_capacity;
stack->entries = (state_value_t *)malloc(initial_capacity * sizeof(state_value_t));
}
/**
* Index a state_values stack.
*
* @param stack
* state_values stack structure.
* @param index
* Index to the stack.
*
* @return Pointer to the state value structure at the given index.
*/
static state_value_t * state_values_stack_index(state_values_stack_t * stack, int index)
{
if (index >= 0)
{
return &stack->entries[index];
}
else
{
return &stack->entries[stack->length - (size_t)(unsigned int)(-index)];
}
}
/**
* Push a new state_value to the state_values stack.
*
* @param stack
* state_values stack structure.
*/
static void state_values_stack_push(state_values_stack_t * stack)
{
size_t const current_capacity = stack->capacity;
size_t const current_length = stack->length;
if (current_length >= current_capacity)
{
size_t const new_capacity = current_capacity * 2u;
state_value_t * new_entries = malloc(new_capacity * sizeof(state_value_t));
memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t));
free(stack->entries);
stack->capacity = new_capacity;
stack->entries = new_entries;
}
memset(&stack->entries[current_length], 0, sizeof(state_value_t));
stack->length = current_length + 1u;
}
/**
* Pop entries from a state_values stack.
*
* @param stack
* state_values stack structure.
* @param n
* Number of states to pop.
*/
static void state_values_stack_pop(state_values_stack_t * stack, size_t n)
{
stack->length -= n;
}
/**
* Free memory for a state_values stack structure.
*
* @param stack
* state_values stack structure.
*/
static void state_values_stack_free(state_values_stack_t * stack)
{
free(stack->entries);
}
/**
* Execute user code associated with a parser rule.
*
* @param rule The ID of the rule.
*
* @return Parse value.
*/
static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states)
{
<%= @grammar.prefix %>value_t _pvalue = {0};
switch (rule)
{
<% @grammar.rules.each do |rule| %>
<% if rule.code %>
case <%= rule.id %>u: {
<%= expand_code(rule.code, true, rule, nil) %>
} break;
<% end %>
<% end %>
default: break;
}
return _pvalue;
}
/**
* Check if the parser should shift to a new state.
*
* @param state_id
* Parser state ID.
* @param symbol_id
* Incoming token/rule set ID.
*
* @return State to shift to, or INVALID_ID if none.
*/
static size_t check_shift(size_t state_id, size_t symbol_id)
{
uint32_t start = parser_state_table[state_id].shift_table_index;
uint32_t end = start + parser_state_table[state_id].n_shift_entries;
for (uint32_t i = start; i < end; i++)
{
if (parser_shift_table[i].symbol_id == symbol_id)
{
return parser_shift_table[i].state_id;
}
}
return INVALID_ID;
}
/**
* Check if the parser should reduce to a new state.
*
* @param state_id
* Parser state ID.
* @param token
* Incoming token.
*
* @return State to reduce to, or INVALID_ID if none.
*/
static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
{
size_t start = parser_state_table[state_id].reduce_table_index;
size_t end = start + parser_state_table[state_id].n_reduce_entries;
for (size_t i = start; i < end; i++)
{
if ((parser_reduce_table[i].token == token) ||
(parser_reduce_table[i].token == INVALID_TOKEN_ID))
{
return i;
}
}
return INVALID_ID;
}
/**
* Run the parser.
*
* @param context
* Lexer/parser context structure.
*
* @retval P_SUCCESS
* The parser successfully matched the input text. The parse result value
* can be accessed with <%= @grammar.prefix %>result().
* @retval P_UNEXPECTED_TOKEN
* An unexpected token was encountered that does not match any grammar rule.
* The value context->token holds the unexpected token.
* @reval P_DECODE_ERROR
* The decoder encountered invalid text encoding.
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
<%= @grammar.prefix %>token_info_t token_info;
<%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
state_values_stack_t statevalues;
size_t reduced_rule_set = INVALID_ID;
<%= @grammar.prefix %>value_t reduced_parser_value;
state_values_stack_init(&statevalues);
state_values_stack_push(&statevalues);
size_t result;
for (;;)
{
if (token == INVALID_TOKEN_ID)
{
size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info);
if (lexer_result != P_SUCCESS)
{
result = lexer_result;
break;
}
token = token_info.token;
}
size_t shift_state = INVALID_ID;
if (reduced_rule_set != INVALID_ID)
{
shift_state = check_shift(state_values_stack_index(&statevalues, -1)->state_id, reduced_rule_set);
}
if (shift_state == INVALID_ID)
{
shift_state = check_shift(state_values_stack_index(&statevalues, -1)->state_id, token);
if ((shift_state != INVALID_ID) && (token == TOKEN___EOF))
{
/* Successful parse. */
context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
result = P_SUCCESS;
break;
}
}
if (shift_state != INVALID_ID)
{
/* We have something to shift. */
state_values_stack_push(&statevalues);
state_values_stack_index(&statevalues, -1)->state_id = shift_state;
if (reduced_rule_set == INVALID_ID)
{
/* We shifted a token, mark it consumed. */
token = INVALID_TOKEN_ID;
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
}
else
{
/* We shifted a RuleSet. */
state_values_stack_index(&statevalues, -1)->pvalue = reduced_parser_value;
<%= @grammar.prefix %>value_t new_parse_result = {0};
reduced_parser_value = new_parse_result;
reduced_rule_set = INVALID_ID;
}
continue;
}
size_t reduce_index = check_reduce(state_values_stack_index(&statevalues, -1)->state_id, token);
if (reduce_index != INVALID_ID)
{
/* We have something to reduce. */
reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states);
reduced_rule_set = parser_reduce_table[reduce_index].rule_set;
state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states);
continue;
}
/* A token was successfully lexed, so the input text position was
* advanced. However, this is an unexpected token, so we want to reset
* the context text position to point to the token rather than the text
* after it, so that if the caller wants to report the error position,
* it will point to the correct position of the unexpected token. */
context->text_position = token_info.position;
context->token = token;
result = P_UNEXPECTED_TOKEN;
break;
}
state_values_stack_free(&statevalues);
return result;
}
/**
* Get the parse result value.
*
* @param context
* Lexer/parser context structure.
*
* @return Parse result value.
*/
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
{
return context->parse_result.v_<%= start_rule_type[0] %>;
}
/**
* Get the current text input position.
*
* @param context
* Lexer/parser context structure.
*
* @return Current text position.
*/
<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context)
{
return context->text_position;
}

View File

@ -12,9 +12,7 @@ module <%= @grammar.modulename %>;
* User code blocks
*************************************************************************/
<% @grammar.code_blocks.each do |code| %>
<%= code %>
<% end %>
<%= @grammar.code_blocks.fetch("", "") %>
/**************************************************************************
* Public types

125
assets/parser.h.erb Normal file
View File

@ -0,0 +1,125 @@
/**
* @file
*
* This file is generated by Propane.
*/
#pragma once
#include <stdint.h>
#include <stddef.h>
/**************************************************************************
* Public types
*************************************************************************/
/* Result codes. */
#define <%= @grammar.prefix.upcase %>SUCCESS 0u
#define <%= @grammar.prefix.upcase %>DECODE_ERROR 1u
#define <%= @grammar.prefix.upcase %>UNEXPECTED_INPUT 2u
#define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u
#define <%= @grammar.prefix.upcase %>DROP 4u
#define <%= @grammar.prefix.upcase %>EOF 5u
/** Token type. */
typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t;
/** Token IDs. */
<% @grammar.tokens.each_with_index do |token, index| %>
#define TOKEN_<%= token.code_name %> <%= index %>u
<% unless token.id == index %>
<% raise "Token ID (#{token.id}) does not match index (#{index}) for token #{token.name}!" %>
<% end %>
<% end %>
#define INVALID_TOKEN_ID <%= @grammar.invalid_token_id %>u
/** Code point type. */
typedef uint32_t <%= @grammar.prefix %>code_point_t;
/** User header code blocks. */
<%= @grammar.code_blocks.fetch("header", "") %>
/** Parser values type(s). */
typedef union
{
<% @grammar.ptypes.each do |name, typestring| %>
<%= typestring %> v_<%= name %>;
<% end %>
} <%= @grammar.prefix %>value_t;
/**
* A structure to keep track of parser position.
*
* This is useful for reporting errors, etc...
*/
typedef struct
{
/** Input text row (0-based). */
uint32_t row;
/** Input text column (0-based). */
uint32_t col;
} <%= @grammar.prefix %>position_t;
/** Lexed token information. */
typedef struct
{
/** Text position where the token was found. */
<%= @grammar.prefix %>position_t position;
/** Number of input bytes used by the token. */
size_t length;
/** Token that was lexed. */
<%= @grammar.prefix %>token_t token;
/** Parser value associated with the token. */
<%= @grammar.prefix %>value_t pvalue;
} <%= @grammar.prefix %>token_info_t;
/**
* Lexer and parser context.
*
* The user must allocate an instance of this structure and pass it to any
* public API function.
*/
typedef struct
{
/* Lexer context data. */
/** Input text. */
uint8_t const * input;
/** Input text length. */
size_t input_length;
/** Input text index (byte offset). */
size_t input_index;
/** Input text position (row/column). */
<%= @grammar.prefix %>position_t text_position;
/** Current lexer mode. */
size_t mode;
/* Parser context data. */
/** Parse result value. */
<%= @grammar.prefix %>value_t parse_result;
/** Unexpected token received. */
<%= @grammar.prefix %>token_t token;
} <%= @grammar.prefix %>context_t;
void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length);
size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length,
<%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length);
size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info);
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context);

View File

@ -13,9 +13,9 @@ Propane is an LR Parser Generator (LPG) which:
* generates a built-in lexer to tokenize input
* supports UTF-8 lexer inputs
* generates a table-driven parser to parse input in linear time
* target C or D language outputs
* is MIT-licensed
* is distributable as a standalone Ruby script
* supports D language
#> Installation
@ -103,6 +103,89 @@ E4 -> lparen E1 rparen <<
>>
```
##> User Code Blocks
User code blocks begin with the line following a "<<" token and end with the
line preceding a grammar line consisting of solely the ">>" token.
All text lines in the code block are copied verbatim into the output file.
### Standalone Code Blocks
C example:
```
#include <stdio.h>
```
D example:
```
<<
import std.stdio;
>>
```
Standalone code blocks are emitted early in the output file as top-level code
outside the context of any function.
Standalone code blocks are a good place to include/import any other necessary
supporting code modules.
They can also define helper functions that can be reused by lexer or parser
user code blocks.
They are emitted in the order they are defined in the grammar file.
For a C target, the word "header" may immediately follow the "<<" token to
cause Propane to emit the code block in the generated header file rather than
the generated implementation file.
This allows including another header that may be necessary to define any types
needed by a `ptype` directive, for example:
<<header
#include "mytypes.h"
>>
### Lexer pattern code blocks
Example:
```
ptype ulong;
token integer /\\d+/ <<
ulong v;
foreach (c; match)
{
v *= 10;
v += (c - '0');
}
$$ = v;
>>
```
Lexer code blocks appear following a `token` or `pattern` expression.
User code in a lexer code block will be executed when the lexer matches the
given pattern.
Assignment to the `$$` symbol will associate a parser value with the lexed
token.
This parser value can then be used later in a parser rule.
### Parser rule code blocks
Example:
```
E1 -> E1 plus E2 <<
$$ = $1 + $3;
>>
```
Parser rule code blocks appear following a rule expression.
User code in a parser rule code block will be executed when the parser reduces
the given rule.
Assignment to the `$$` symbol will associate a parser value with the reduced
rule.
Parser values for the rules or tokens in the rule pattern can be accessed
positionally with tokens `$1`, `$2`, `$3`, etc...
#> License
Propane is licensed under the terms of the MIT License:

View File

@ -11,14 +11,27 @@ class Propane
@log = StringIO.new
end
@classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
@language =
if output_file =~ /\.([a-z]+)$/
$1
else
"d"
end
process_grammar!
end
def generate
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), trim_mode: "<>")
result = erb.result(binding.clone)
File.open(@output_file, "wb") do |fh|
fh.write(result)
extensions = [@language]
if @language == "c"
extensions += %w[h]
end
extensions.each do |extension|
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.#{extension}.erb")), trim_mode: "<>")
output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}")
result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh|
fh.write(result)
end
end
@log.close
end
@ -191,11 +204,21 @@ class Propane
end
code = code.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
case @language
when "c"
"state_values_stack_index(statevalues, -1 - (int)n_states + #{index})->pvalue.v_#{rule.components[index - 1].ptypename}"
when "d"
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
end
end
else
code = code.gsub(/\$\$/) do |match|
"out_token_info.pvalue.v_#{pattern.ptypename}"
case @language
when "c"
"out_token_info->pvalue.v_#{pattern.ptypename}"
when "d"
"out_token_info.pvalue.v_#{pattern.ptypename}"
end
end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
@ -203,7 +226,12 @@ class Propane
unless mode_id
raise Error.new("Lexer mode '#{mode_name}' not found")
end
"context.mode = #{mode_id}u"
case @language
when "c"
"context->mode = #{mode_id}u"
when "d"
"context.mode = #{mode_id}u"
end
end
end
code
@ -229,11 +257,26 @@ class Propane
# Type.
def get_type_for(max)
if max <= 0xFF
"ubyte"
case @language
when "c"
"uint8_t"
when "d"
"ubyte"
end
elsif max <= 0xFFFF
"ushort"
case @language
when "c"
"uint16_t"
when "d"
"ushort"
end
else
"uint"
case @language
when "c"
"uint32_t"
else
"uint"
end
end
end

View File

@ -18,7 +18,7 @@ class Propane
@patterns = []
@tokens = []
@rules = []
@code_blocks = []
@code_blocks = {}
@line_number = 1
@next_line_number = @line_number
@mode = nil
@ -191,8 +191,13 @@ class Propane
end
def parse_code_block_statement!
if code = parse_code_block!
@code_blocks << code
if md = consume!(/<<([a-z]*)\n(.*?)^>>\n/m)
name, code = md[1..2]
if @code_blocks[name]
@code_blocks[name] += code
else
@code_blocks[name] = code
end
@mode = nil
true
end

183
spec/json_parser.c.propane Normal file
View File

@ -0,0 +1,183 @@
<<header
#include "json_types.h"
#include "testutils.h"
>>
<<
#include "math.h"
#include <stdbool.h>
static str_t string_value;
>>
ptype JSONValue *;
drop /\s+/;
token lbrace /\{/;
token rbrace /\}/;
token lbracket /\[/;
token rbracket /\]/;
token comma /,/;
token colon /:/;
token number /-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?/ <<
double n = 0.0;
bool negative = false;
size_t i = 0u;
if (match[i] == '-')
{
negative = true;
i++;
}
while ('0' <= match[i] && match[i] <= '9')
{
n *= 10.0;
n += (match[i] - '0');
i++;
}
if (match[i] == '.')
{
i++;
double mult = 0.1;
while ('0' <= match[i] && match[i] <= '9')
{
n += mult * (match[i] - '0');
mult /= 10.0;
i++;
}
}
if (match[i] == 'e' || match[i] == 'E')
{
bool exp_negative = false;
i++;
if (match[i] == '-')
{
exp_negative = true;
i++;
}
else if (match[i] == '+')
{
i++;
}
long exp = 0.0;
while ('0' <= match[i] && match[i] <= '9')
{
exp *= 10;
exp += (match[i] - '0');
i++;
}
if (exp_negative)
{
exp = -exp;
}
n = pow(n, exp);
}
if (negative)
{
n = -n;
}
$$ = JSONValue_new(JSON_NUMBER);
$$->number = n;
>>
token true <<
$$ = JSONValue_new(JSON_TRUE);
>>
token false <<
$$ = JSONValue_new(JSON_FALSE);
>>
token null <<
$$ = JSONValue_new(JSON_NULL);
>>
/"/ <<
$mode(string);
str_init(&string_value, "");
>>
string: token string /"/ <<
$$ = JSONValue_new(JSON_STRING);
$$->string = string_value;
$mode(default);
>>
string: /\\"/ <<
str_append(&string_value, "\"");
>>
string: /\\\\/ <<
str_append(&string_value, "\\");
>>
string: /\\\// <<
str_append(&string_value, "/");
>>
string: /\\b/ <<
str_append(&string_value, "\b");
>>
string: /\\f/ <<
str_append(&string_value, "\f");
>>
string: /\\n/ <<
str_append(&string_value, "\n");
>>
string: /\\r/ <<
str_append(&string_value, "\r");
>>
string: /\\t/ <<
str_append(&string_value, "\t");
>>
string: /\\u[0-9a-fA-F]{4}/ <<
/* Not actually going to encode the code point for this example... */
char s[] = {'{', match[2], match[3], match[4], match[5], '}', 0};
str_append(&string_value, s);
>>
string: /[^\\]/ <<
char s[] = {match[0], 0};
str_append(&string_value, s);
>>
Start -> Value <<
$$ = $1;
>>
Value -> string <<
$$ = $1;
>>
Value -> number <<
$$ = $1;
>>
Value -> Object <<
$$ = $1;
>>
Value -> Array <<
$$ = $1;
>>
Value -> true <<
$$ = $1;
>>
Value -> false <<
$$ = $1;
>>
Value -> null <<
$$ = $1;
>>
Object -> lbrace rbrace <<
$$ = JSONObject_new();
>>
Object -> lbrace KeyValues rbrace <<
$$ = $2;
>>
KeyValues -> KeyValue <<
$$ = $1;
>>
KeyValues -> KeyValues comma KeyValue <<
JSONObject_append($1, $3->object.entries[0].name, $3->object.entries[0].value);
$$ = $1;
>>
KeyValue -> string colon Value <<
$$ = JSONObject_new();
JSONObject_append($$, str_cstr(&$1->string), $3);
>>
Array -> lbracket rbracket <<
$$ = JSONArray_new();
>>
Array -> lbracket Values rbracket <<
$$ = $2;
>>
Values -> Value <<
$$ = $1;
>>
Values -> Values comma Value <<
JSONArray_append($1, $3);
$$ = $1;
>>

64
spec/json_types.c Normal file
View File

@ -0,0 +1,64 @@
#include "json_types.h"
#include <string.h>
#include <stdlib.h>
#include "testutils.h"
JSONValue * JSONValue_new(size_t id)
{
JSONValue * jv = calloc(1, sizeof(JSONValue));
jv->id = id;
return jv;
}
JSONValue * JSONObject_new(void)
{
JSONValue * jv = JSONValue_new(JSON_OBJECT);
jv->object.size = 0u;
return jv;
}
void JSONObject_append(JSONValue * object, char const * name, JSONValue * value)
{
size_t const size = object->object.size;
for (size_t i = 0u; i < size; i++)
{
if (strcmp(name, object->object.entries[i].name) == 0)
{
object->object.entries[i].value = value;
return;
}
}
size_t const new_size = size + 1;
void * new_entries = malloc(sizeof(object->object.entries[0]) * new_size);
if (size > 0)
{
memcpy(new_entries, object->object.entries, size * sizeof(object->object.entries[0]));
free(object->object.entries);
}
object->object.entries = new_entries;
object->object.entries[size].name = name;
object->object.entries[size].value = value;
object->object.size = new_size;
}
JSONValue * JSONArray_new(void)
{
JSONValue * jv = JSONValue_new(JSON_ARRAY);
jv->array.size = 0u;
return jv;
}
void JSONArray_append(JSONValue * array, JSONValue * value)
{
size_t const size = array->array.size;
size_t const new_size = size + 1;
JSONValue ** new_entries = malloc(sizeof(JSONValue *) * new_size);
if (array->array.size > 0)
{
memcpy(new_entries, array->array.entries, sizeof(JSONValue *) * size);
free(array->array.entries);
}
array->array.entries = new_entries;
array->array.entries[size] = value;
array->array.size = new_size;
}

46
spec/json_types.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <stddef.h>
#include "testutils.h"
#define JSON_OBJECT 0u
#define JSON_ARRAY 1u
#define JSON_NUMBER 2u
#define JSON_STRING 3u
#define JSON_TRUE 4u
#define JSON_FALSE 5u
#define JSON_NULL 6u
typedef struct JSONValue_s
{
size_t id;
union
{
struct
{
size_t size;
struct
{
char const * name;
struct JSONValue_s * value;
} * entries;
} object;
struct
{
size_t size;
struct JSONValue_s ** entries;
} array;
double number;
str_t string;
};
} JSONValue;
JSONValue * JSONValue_new(size_t id);
JSONValue * JSONObject_new(void);
void JSONObject_append(JSONValue * object, char const * name, JSONValue * value);
JSONValue * JSONArray_new(void);
void JSONArray_append(JSONValue * array, JSONValue * value);

View File

@ -11,7 +11,7 @@ describe Propane do
def build_parser(options = {})
options[:name] ||= ""
command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.d --log spec/run/testparser#{options[:name]}.log]
command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.#{options[:language]} --log spec/run/testparser#{options[:name]}.log]
if (options[:capture])
stdout, stderr, status = Open3.capture3(*command)
Results.new(stdout, stderr, status)
@ -25,9 +25,14 @@ describe Propane do
test_files = Array(test_files)
options[:parsers] ||= [""]
parsers = options[:parsers].map do |name|
"spec/run/testparser#{name}.d"
"spec/run/testparser#{name}.#{options[:language]}"
end
case options[:language]
when "c"
result = system(*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm")
when "d"
result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d")
end
result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d")
expect(result).to be_truthy
end
@ -69,8 +74,12 @@ describe Propane do
FileUtils.mkdir_p("spec/run")
end
it "generates a lexer" do
write_grammar <<EOF
%w[d c].each do |language|
context "#{language.upcase} language" do
it "generates a lexer" do
write_grammar <<EOF
token int /\\d+/;
token plus /\\+/;
token times /\\*/;
@ -81,15 +90,33 @@ Foo -> int <<
Foo -> plus <<
>>
EOF
build_parser
compile("spec/test_lexer.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
build_parser(language: language)
compile("spec/test_lexer.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "detects a lexer error when an unknown character is seen" do
write_grammar <<EOF
it "detects a lexer error when an unknown character is seen" do
case language
when "c"
write_grammar <<EOF
ptype int;
token int /\\d+/ <<
int v = 0;
for (size_t i = 0u; i < match_length; i++)
{
v *= 10;
v += (match[i] - '0');
}
$$ = v;
>>
Start -> int <<
$$ = $1;
>>
EOF
when "d"
write_grammar <<EOF
ptype int;
token int /\\d+/ <<
int v;
@ -104,15 +131,16 @@ Start -> int <<
$$ = $1;
>>
EOF
build_parser
compile("spec/test_lexer_unknown_character.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end
build_parser(language: language)
compile("spec/test_lexer_unknown_character.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "generates a parser" do
write_grammar <<EOF
it "generates a parser" do
write_grammar <<EOF
token plus /\\+/;
token times /\\*/;
token zero /0/;
@ -124,11 +152,65 @@ E -> B;
B -> zero;
B -> one;
EOF
build_parser
end
build_parser(language: language)
end
it "generates a parser that does basic math - user guide example" do
write_grammar <<EOF
it "generates a parser that does basic math - user guide example" do
case language
when "c"
write_grammar <<EOF
<<
#include <math.h>
>>
ptype size_t;
token plus /\\+/;
token times /\\*/;
token power /\\*\\*/;
token integer /\\d+/ <<
size_t v = 0u;
for (size_t i = 0u; i < match_length; i++)
{
v *= 10;
v += (match[i] - '0');
}
$$ = v;
>>
token lparen /\\(/;
token rparen /\\)/;
drop /\\s+/;
Start -> E1 <<
$$ = $1;
>>
E1 -> E2 <<
$$ = $1;
>>
E1 -> E1 plus E2 <<
$$ = $1 + $3;
>>
E2 -> E3 <<
$$ = $1;
>>
E2 -> E2 times E3 <<
$$ = $1 * $3;
>>
E3 -> E4 <<
$$ = $1;
>>
E3 -> E3 power E4 <<
$$ = (size_t)pow($1, $3);
>>
E4 -> integer <<
$$ = $1;
>>
E4 -> lparen E1 rparen <<
$$ = $2;
>>
EOF
when "d"
write_grammar <<EOF
<<
import std.math;
>>
@ -179,25 +261,26 @@ E4 -> lparen E1 rparen <<
$$ = $2;
>>
EOF
build_parser
compile("spec/test_basic_math_grammar.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end
build_parser(language: language)
compile("spec/test_basic_math_grammar.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "generates an SLR parser" do
write_grammar <<EOF
it "generates an SLR parser" do
write_grammar <<EOF
token one /1/;
Start -> E;
E -> one E;
E -> one;
EOF
build_parser
end
build_parser(language: language)
end
it "distinguishes between multiple identical rules with lookahead symbol" do
write_grammar <<EOF
it "distinguishes between multiple identical rules with lookahead symbol" do
write_grammar <<EOF
token a;
token b;
Start -> R1 a;
@ -205,14 +288,14 @@ Start -> R2 b;
R1 -> a b;
R2 -> a b;
EOF
build_parser
compile("spec/test_parser_identical_rules_lookahead.d")
results = run
expect(results.status).to eq 0
end
build_parser(language: language)
compile("spec/test_parser_identical_rules_lookahead.#{language}", language: language)
results = run
expect(results.status).to eq 0
end
it "handles reducing a rule that could be arrived at from multiple states" do
write_grammar <<EOF
it "handles reducing a rule that could be arrived at from multiple states" do
write_grammar <<EOF
token a;
token b;
drop /\\s+/;
@ -220,14 +303,29 @@ Start -> a R1;
Start -> b R1;
R1 -> b;
EOF
build_parser
compile("spec/test_parser_rule_from_multiple_states.d")
results = run
expect(results.status).to eq 0
end
build_parser(language: language)
compile("spec/test_parser_rule_from_multiple_states.#{language}", language: language)
results = run
expect(results.status).to eq 0
end
it "executes user code when matching lexer token" do
write_grammar <<EOF
it "executes user code when matching lexer token" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
token abc <<
printf("abc!\\n");
>>
token def;
Start -> Abcs def;
Abcs -> ;
Abcs -> abc Abcs;
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -239,21 +337,35 @@ Start -> Abcs def;
Abcs -> ;
Abcs -> abc Abcs;
EOF
build_parser
compile("spec/test_user_code.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"abc!",
"pass1",
"abc!",
"abc!",
"pass2",
])
end
end
build_parser(language: language)
compile("spec/test_user_code.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"abc!",
"pass1",
"abc!",
"abc!",
"pass2",
])
end
it "supports a pattern statement" do
write_grammar <<EOF
it "supports a pattern statement" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
token abc;
/def/ <<
printf("def!\\n");
>>
Start -> abc;
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -263,21 +375,39 @@ token abc;
>>
Start -> abc;
EOF
build_parser
compile("spec/test_pattern.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"pass1",
"def!",
"def!",
"pass2",
])
end
end
build_parser(language: language)
compile("spec/test_pattern.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"pass1",
"def!",
"def!",
"pass2",
])
end
it "supports returning tokens from pattern code blocks" do
write_grammar <<EOF
it "supports returning tokens from pattern code blocks" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
token abc;
/def/ <<
printf("def!\\n");
>>
/ghi/ <<
printf("ghi!\\n");
return $token(abc);
>>
Start -> abc;
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -291,19 +421,44 @@ token abc;
>>
Start -> abc;
EOF
build_parser
compile("spec/test_return_token_from_pattern.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"ghi!",
"def!",
])
end
end
build_parser(language: language)
compile("spec/test_return_token_from_pattern.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"ghi!",
"def!",
])
end
it "supports lexer modes" do
write_grammar <<EOF
it "supports lexer modes" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
token abc;
token def;
tokenid string;
drop /\\s+/;
/"/ <<
printf("begin string mode\\n");
$mode(string);
>>
string: /[^"]+/ <<
printf("captured string\\n");
>>
string: /"/ <<
$mode(default);
return $token(string);
>>
Start -> abc string def;
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -324,22 +479,42 @@ string: /"/ <<
>>
Start -> abc string def;
EOF
build_parser
compile("spec/test_lexer_modes.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"begin string mode",
"captured string",
"pass1",
"begin string mode",
"captured string",
"pass2",
])
end
end
build_parser(language: language)
compile("spec/test_lexer_modes.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"begin string mode",
"captured string",
"pass1",
"begin string mode",
"captured string",
"pass2",
])
end
it "executes user code associated with a parser rule" do
write_grammar <<EOF
it "executes user code associated with a parser rule" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
token a;
token b;
Start -> A B <<
printf("Start!\\n");
>>
A -> a <<
printf("A!\\n");
>>
B -> b <<
printf("B!\\n");
>>
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -355,20 +530,21 @@ B -> b <<
writeln("B!");
>>
EOF
build_parser
compile("spec/test_parser_rule_user_code.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"A!",
"B!",
"Start!",
])
end
end
build_parser(language: language)
compile("spec/test_parser_rule_user_code.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"A!",
"B!",
"Start!",
])
end
it "parses lists" do
write_grammar <<EOF
ptype uint;
it "parses lists" do
write_grammar <<EOF
ptype #{language == "c" ? "uint32_t" : "uint"};
token a;
Start -> As <<
$$ = $1;
@ -380,15 +556,15 @@ As -> As a <<
$$ = $1 + 1u;
>>
EOF
build_parser
compile("spec/test_parsing_lists.d")
results = run
expect(results.status).to eq 0
expect(results.stderr).to eq ""
end
build_parser(language: language)
compile("spec/test_parsing_lists.#{language}", language: language)
results = run
expect(results.status).to eq 0
expect(results.stderr).to eq ""
end
it "fails to generate a parser for a LR(1) grammar that is not LALR" do
write_grammar <<EOF
it "fails to generate a parser for a LR(1) grammar that is not LALR" do
write_grammar <<EOF
token a;
token b;
token c;
@ -401,13 +577,29 @@ Start -> b E d;
E -> e;
F -> e;
EOF
results = build_parser(capture: true)
expect(results.status).to_not eq 0
expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)}
end
results = build_parser(capture: true, language: language)
expect(results.status).to_not eq 0
expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)}
end
it "provides matched text to user code blocks" do
write_grammar <<EOF
it "provides matched text to user code blocks" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
#include <stdlib.h>
>>
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * t = malloc(match_length + 1);
strncpy(t, (char *)match, match_length);
printf("Matched token is %s\\n", t);
free(t);
>>
Start -> id;
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
@ -416,18 +608,31 @@ token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
>>
Start -> id;
EOF
build_parser
compile("spec/test_lexer_match_text.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"Matched token is identifier_123",
"pass1",
])
end
end
build_parser(language: language)
compile("spec/test_lexer_match_text.#{language}", language: language)
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"Matched token is identifier_123",
"pass1",
])
end
it "allows storing a result value for the lexer" do
write_grammar <<EOF
it "allows storing a result value for the lexer" do
case language
when "c"
write_grammar <<EOF
ptype uint64_t;
token word /[a-z]+/ <<
$$ = match_length;
>>
Start -> word <<
$$ = $1;
>>
EOF
when "d"
write_grammar <<EOF
ptype ulong;
token word /[a-z]+/ <<
$$ = match.length;
@ -436,53 +641,56 @@ Start -> word <<
$$ = $1;
>>
EOF
build_parser
compile("spec/test_lexer_result_value.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end
build_parser(language: language)
compile("spec/test_lexer_result_value.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "tracks position of parser errors" do
write_grammar <<EOF
it "tracks position of parser errors" do
write_grammar <<EOF
token a;
token num /\\d+/;
drop /\\s+/;
Start -> a num Start;
Start -> a num;
EOF
build_parser
compile("spec/test_error_positions.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
build_parser(language: language)
compile("spec/test_error_positions.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.propane"))
build_parser
compile(["spec/test_parsing_json.d", "spec/json_types.d"])
end
it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.#{language}.propane"))
build_parser(language: language)
compile(["spec/test_parsing_json.#{language}", "spec/json_types.#{language}"], language: language)
end
it "allows generating multiple parsers in the same program" do
write_grammar(<<EOF, name: "myp1")
it "allows generating multiple parsers in the same program" do
write_grammar(<<EOF, name: "myp1")
prefix myp1_;
token a;
token num /\\d+/;
drop /\\s+/;
Start -> a num;
EOF
build_parser(name: "myp1")
write_grammar(<<EOF, name: "myp2")
build_parser(name: "myp1", language: language)
write_grammar(<<EOF, name: "myp2")
prefix myp2_;
token b;
token c;
Start -> b c b;
EOF
build_parser(name: "myp2")
compile("spec/test_multiple_parsers.d", parsers: %w[myp1 myp2])
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
build_parser(name: "myp2", language: language)
compile("spec/test_multiple_parsers.#{language}", parsers: %w[myp1 myp2], language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end
end
end

View File

@ -0,0 +1,29 @@
#include "testparser.h"
#include "testutils.h"
#include <string.h>
int main()
{
char const * input = "1 + 2 * 3 + 4";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
assert_eq(11, p_result(&context));
input = "1 * 2 ** 4 * 3";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
assert_eq(48, p_result(&context));
input = "(1 + 2) * 3 + 4";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
assert_eq(13, p_result(&context));
input = "(2 * 2) ** 3 + 4 + 5";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
assert_eq(73, p_result(&context));
return 0;
}

View File

@ -0,0 +1,39 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "a 42";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "a\n123\na a";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context).row == 2);
assert(p_position(&context).col == 3);
assert(context.token == TOKEN_a);
input = "12";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context).row == 0);
assert(p_position(&context).col == 0);
assert(context.token == TOKEN_num);
input = "a 12\n\nab";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_UNEXPECTED_INPUT);
assert(p_position(&context).row == 2);
assert(p_position(&context).col == 1);
input = "a 12\n\na\n\n77\na \xAA";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_DECODE_ERROR);
assert(p_position(&context).row == 5);
assert(p_position(&context).col == 4);
return 0;
}

View File

@ -33,6 +33,5 @@ unittest
input = "a 12\n\na\n\n77\na \xAA";
p_context_init(&context, input);
assert(p_parse(&context) == P_DECODE_ERROR);
writeln(p_position(&context));
assert(p_position(&context) == p_position_t(5, 4));
}

92
spec/test_lexer.c Normal file
View File

@ -0,0 +1,92 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
size_t result;
p_code_point_t code_point;
uint8_t code_point_length;
result = p_decode_code_point((uint8_t const *)"5", 1u, &code_point, &code_point_length);
assert(result == P_SUCCESS);
assert(code_point == '5');
assert(code_point_length == 1u);
result = p_decode_code_point((uint8_t const *)"", 0u, &code_point, &code_point_length);
assert(result == P_EOF);
result = p_decode_code_point((uint8_t const *)"\xC2\xA9", 2u, &code_point, &code_point_length);
assert(result == P_SUCCESS);
assert(code_point == 0xA9u);
assert(code_point_length == 2u);
result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xa1", 4u, &code_point, &code_point_length);
assert(result == P_SUCCESS);
assert(code_point == 0x1F9E1u);
assert(code_point_length == 4u);
result = p_decode_code_point((uint8_t const *)"\xf0\x9f\x27", 3u, &code_point, &code_point_length);
assert(result == P_DECODE_ERROR);
result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xFF", 4u, &code_point, &code_point_length);
assert(result == P_DECODE_ERROR);
result = p_decode_code_point((uint8_t const *)"\xfe", 1u, &code_point, &code_point_length);
assert(result == P_DECODE_ERROR);
p_token_info_t token_info;
char const * input = "5 + 4 * \n677 + 567";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u);
assert(token_info.position.col == 0u);
assert(token_info.length == 1u);
assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u);
assert(token_info.position.col == 2u);
assert(token_info.length == 1u);
assert(token_info.token == TOKEN_plus);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u);
assert(token_info.position.col == 4u);
assert(token_info.length == 1u);
assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u);
assert(token_info.position.col == 6u);
assert(token_info.length == 1u);
assert(token_info.token == TOKEN_times);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u);
assert(token_info.position.col == 0u);
assert(token_info.length == 3u);
assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u);
assert(token_info.position.col == 4u);
assert(token_info.length == 1u);
assert(token_info.token == TOKEN_plus);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u);
assert(token_info.position.col == 6u);
assert(token_info.length == 3u);
assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u);
assert(token_info.position.col == 9u);
assert(token_info.length == 0u);
assert(token_info.token == TOKEN___EOF);
p_context_init(&context, (uint8_t const *)"", 0u);
assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u);
assert(token_info.position.col == 0u);
assert(token_info.length == 0u);
assert(token_info.token == TOKEN___EOF);
return 0;
}

View File

@ -0,0 +1,15 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
int main()
{
char const * input = "identifier_123";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass1\n");
return 0;
}

20
spec/test_lexer_modes.c Normal file
View File

@ -0,0 +1,20 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
int main()
{
char const * input = "abc \"a string\" def";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass1\n");
input = "abc \"abc def\" def";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass2\n");
return 0;
}

View File

@ -0,0 +1,19 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "x";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 1u);
input = "fabulous";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 8u);
return 0;
}

View File

@ -0,0 +1,18 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "x";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_UNEXPECTED_INPUT);
input = "123";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 123u);
return 0;
}

View File

@ -0,0 +1,19 @@
#include "testparsermyp1.h"
#include "testparsermyp2.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input1 = "a\n1";
myp1_context_t context1;
myp1_context_init(&context1, (uint8_t const *)input1, strlen(input1));
assert(myp1_parse(&context1) == MYP1_SUCCESS);
char const * input2 = "bcb";
myp2_context_t context2;
myp2_context_init(&context2, (uint8_t const *)input2, strlen(input2));
assert(myp2_parse(&context2) == MYP2_SUCCESS);
return 0;
}

View File

@ -0,0 +1,17 @@
#include "testparser.h"
#include <string.h>
#include <assert.h>
int main()
{
char const * input = "aba";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "abb";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

View File

@ -0,0 +1,24 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "a";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context).row == 0);
assert(p_position(&context).col == 1);
assert(context.token == TOKEN___EOF);
input = "a b";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "bb";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

View File

@ -0,0 +1,13 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "ab";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

56
spec/test_parsing_json.c Normal file
View File

@ -0,0 +1,56 @@
#include "testparser.h"
#include "json_types.h"
#include <string.h>
#include <assert.h>
int main()
{
char const * input = "";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "{}";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context)->id == JSON_OBJECT);
input = "[]";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context)->id == JSON_ARRAY);
input = "-45.6";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context)->id == JSON_NUMBER);
assert(p_result(&context)->number == -45.6);
input = "2E-2";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context)->id == JSON_NUMBER);
assert(p_result(&context)->number == 0.02);
input = "{\"hi\":true}";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
JSONValue * o = p_result(&context);
assert(o->id == JSON_OBJECT);
assert_eq(1, o->object.size);
assert(strcmp(o->object.entries[0].name, "hi") == 0);
assert(o->object.entries[0].value->id == JSON_TRUE);
input = "{\"ff\": false, \"nn\": null}";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
o = p_result(&context);
assert(o->id == JSON_OBJECT);
assert_eq(2, o->object.size);
assert(strcmp(o->object.entries[0].name, "ff") == 0);
assert(o->object.entries[0].value->id == JSON_FALSE);
assert(strcmp(o->object.entries[1].name, "nn") == 0);
assert(o->object.entries[1].value->id == JSON_NULL);
return 0;
}

24
spec/test_parsing_lists.c Normal file
View File

@ -0,0 +1,24 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "a";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 1u);
input = "";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 0u);
input = "aaaaaaaaaaaaaaaa";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
assert(p_result(&context) == 16u);
return 0;
}

20
spec/test_pattern.c Normal file
View File

@ -0,0 +1,20 @@
#include "testparser.h"
#include <stdio.h>
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "abcdef";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass1\n");
input = "defabcdef";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass2\n");
return 0;
}

View File

@ -0,0 +1,13 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "defghidef";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

20
spec/test_user_code.c Normal file
View File

@ -0,0 +1,20 @@
#include "testparser.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
int main()
{
char const * input = "abcdef";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass1\n");
input = "abcabcdef";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass2\n");
return 0;
}

38
spec/testutils.c Normal file
View File

@ -0,0 +1,38 @@
#include <stdio.h>
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "testutils.h"
void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_t line)
{
if (expected != actual)
{
fprintf(stderr, "%s:%lu: expected %lu, got %lu\n", file, line, expected, actual);
assert(false);
}
}
void str_init(str_t * str, char const * cs)
{
size_t length = strlen(cs);
str->cs = malloc(length + 1u);
strcpy(str->cs, cs);
}
void str_append(str_t * str, char const * cs)
{
size_t length = strlen(str->cs);
size_t length2 = strlen(cs);
char * new_cs = malloc(length + length2 + 1u);
memcpy(new_cs, str->cs, length);
strcpy(&new_cs[length], cs);
free(str->cs);
str->cs = new_cs;
}
void str_free(str_t * str)
{
free(str->cs);
}

19
spec/testutils.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_t line);
#define assert_eq(expected, actual) \
assert_eq_size_t_i(expected, actual, __FILE__, __LINE__)
typedef struct
{
char * cs;
} str_t;
void str_init(str_t * str, char const * cs);
void str_append(str_t * str, char const * cs);
void str_free(str_t * str);
static inline char * str_cstr(str_t * str)
{
return str->cs;
}