Allow configuring API prefix - close #9

This commit is contained in:
Josh Holtrop 2023-07-13 18:06:24 -04:00
parent 7d7929a358
commit 5ce562cbc3
5 changed files with 136 additions and 59 deletions

View File

@ -25,19 +25,19 @@ import std.stdio;
/* Result codes. */
public enum : size_t
{
P_SUCCESS,
P_DECODE_ERROR,
P_UNEXPECTED_INPUT,
P_UNEXPECTED_TOKEN,
P_DROP,
P_EOF,
<%= @grammar.prefix.upcase %>SUCCESS,
<%= @grammar.prefix.upcase %>DECODE_ERROR,
<%= @grammar.prefix.upcase %>UNEXPECTED_INPUT,
<%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN,
<%= @grammar.prefix.upcase %>DROP,
<%= @grammar.prefix.upcase %>EOF,
}
/** Token type. */
public alias p_token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
public alias <%= @grammar.prefix %>token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
/** Token IDs. */
public enum : p_token_t
public enum : <%= @grammar.prefix %>token_t
{
<% @grammar.tokens.each_with_index do |token, index| %>
TOKEN_<%= token.code_name %> = <%= index %>,
@ -49,10 +49,10 @@ public enum : p_token_t
}
/** Code point type. */
public alias p_code_point_t = uint;
public alias <%= @grammar.prefix %>code_point_t = uint;
/** Parser values type(s). */
public union p_value_t
public union <%= @grammar.prefix %>value_t
{
<% @grammar.ptypes.each do |name, typestring| %>
<%= typestring %> v_<%= name %>;
@ -64,7 +64,7 @@ public union p_value_t
*
* This is useful for reporting errors, etc...
*/
public struct p_position_t
public struct <%= @grammar.prefix %>position_t
{
/** Input text row (0-based). */
uint row;
@ -74,19 +74,19 @@ public struct p_position_t
}
/** Lexed token information. */
public struct p_token_info_t
public struct <%= @grammar.prefix %>token_info_t
{
/** Text position where the token was found. */
p_position_t position;
<%= @grammar.prefix %>position_t position;
/** Number of input bytes used by the token. */
size_t length;
/** Token that was lexed. */
p_token_t token;
<%= @grammar.prefix %>token_t token;
/** Parser value associated with the token. */
p_value_t pvalue;
<%= @grammar.prefix %>value_t pvalue;
}
/**
@ -95,7 +95,7 @@ public struct p_token_info_t
* The user must allocate an instance of this structure and pass it to any
* public API function.
*/
public struct p_context_t
public struct <%= @grammar.prefix %>context_t
{
/* Lexer context data. */
@ -106,7 +106,7 @@ public struct p_context_t
size_t input_index;
/** Input text position (row/column). */
p_position_t text_position;
<%= @grammar.prefix %>position_t text_position;
/** Current lexer mode. */
size_t mode;
@ -114,7 +114,7 @@ public struct p_context_t
/* Parser context data. */
/** Parse result value. */
p_value_t parse_result;
<%= @grammar.prefix %>value_t parse_result;
}
/**************************************************************************
@ -122,7 +122,7 @@ public struct p_context_t
*************************************************************************/
/** Token names. */
public immutable string[] p_token_names = [
public immutable string[] <%= @grammar.prefix %>token_names = [
<% @grammar.tokens.each_with_index do |token, index| %>
"<%= token.name %>",
<% end %>
@ -132,6 +132,19 @@ public immutable string[] p_token_names = [
* Private types
*************************************************************************/
<% if @grammar.prefix.upcase != "P_" %>
/* Result codes. */
private enum : size_t
{
P_SUCCESS,
P_DECODE_ERROR,
P_UNEXPECTED_INPUT,
P_UNEXPECTED_TOKEN,
P_DROP,
P_EOF,
}
<% end %>
/* An invalid ID value. */
private enum size_t INVALID_ID = cast(size_t)-1;
@ -147,10 +160,10 @@ private enum size_t INVALID_ID = cast(size_t)-1;
* @param input
* Text input.
*/
public void p_context_init(p_context_t * context, string input)
public void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, string input)
{
/* New default-initialized context structure. */
p_context_t newcontext;
<%= @grammar.prefix %>context_t newcontext;
/* Lexer initialization. */
newcontext.input = input;
@ -179,15 +192,15 @@ public void p_context_init(p_context_t * context, string input)
* @retval P_DECODE_ERROR when an encoding error is observed
* @retval P_EOF when the end of the text input is reached
*/
public size_t p_decode_code_point(string input,
p_code_point_t * out_code_point, ubyte * out_code_point_length)
public size_t <%= @grammar.prefix %>decode_code_point(string input,
<%= @grammar.prefix %>code_point_t * out_code_point, ubyte * out_code_point_length)
{
if (input.length == 0u)
{
return P_EOF;
}
char c = input[0];
p_code_point_t code_point;
<%= @grammar.prefix %>code_point_t code_point;
ubyte code_point_length;
if ((c & 0x80u) == 0u)
{
@ -272,10 +285,10 @@ private enum lexer_user_code_id_t INVALID_USER_CODE_ID = <%= user_code_id_count
private struct lexer_transition_t
{
/** First code point in the range for this transition. */
p_code_point_t first;
<%= @grammar.prefix %>code_point_t first;
/** Last code point in the range for this transition. */
p_code_point_t last;
<%= @grammar.prefix %>code_point_t last;
/** Destination lexer state ID for this transition. */
lexer_state_id_t destination_state;
@ -291,7 +304,7 @@ private struct lexer_state_t
<%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions;
/** Lexer token formed at this state. */
p_token_t token;
<%= @grammar.prefix %>token_t token;
/** Lexer user code ID to execute at this state. */
lexer_user_code_id_t code_id;
@ -319,7 +332,7 @@ private struct lexer_match_info_t
size_t length;
/** Input text position delta. */
p_position_t delta_position;
<%= @grammar.prefix %>position_t delta_position;
/** Accepting lexer state from the match. */
const(lexer_state_t) * accepting_state;
@ -361,9 +374,9 @@ private immutable lexer_mode_t[] lexer_mode_table = [
* @return Token to accept, or invalid token if the user code does
* not explicitly return a token.
*/
private p_token_t lexer_user_code(p_context_t * context,
private <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context,
lexer_user_code_id_t code_id, string match,
p_token_info_t * out_token_info)
<%= @grammar.prefix %>token_info_t * out_token_info)
{
switch (code_id)
{
@ -423,7 +436,7 @@ private lexer_state_id_t check_lexer_transition(uint current_state, uint code_po
* @retval P_EOF
* The end of the text input was reached.
*/
private size_t find_longest_match(p_context_t * context,
private size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length)
{
lexer_match_info_t longest_match;
@ -433,9 +446,9 @@ private size_t find_longest_match(p_context_t * context,
for (;;)
{
string input = context.input[(context.input_index + attempt_match.length)..(context.input.length)];
p_code_point_t code_point;
<%= @grammar.prefix %>code_point_t code_point;
ubyte code_point_length;
size_t result = p_decode_code_point(input, &code_point, &code_point_length);
size_t result = <%= @grammar.prefix %>decode_code_point(input, &code_point, &code_point_length);
switch (result)
{
case P_SUCCESS:
@ -523,9 +536,9 @@ private size_t find_longest_match(p_context_t * context,
* @retval P_DROP
* A drop pattern was matched so the lexer should continue.
*/
private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{
p_token_info_t token_info;
<%= @grammar.prefix %>token_info_t token_info;
token_info.position = context.text_position;
token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove
@ -535,11 +548,11 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
switch (result)
{
case P_SUCCESS:
p_token_t token_to_accept = match_info.accepting_state.token;
<%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state.token;
if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID)
{
string match = context.input[context.input_index..(context.input_index + match_info.length)];
p_token_t user_code_token = lexer_user_code(context,
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
match_info.accepting_state.code_id, match, &token_info);
/* An invalid token returned from lexer_user_code() means that the
* user code did not explicitly return a token. So only override
@ -612,7 +625,7 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
public size_t p_lex(p_context_t * context, p_token_info_t * out_token_info)
public size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{
for (;;)
{
@ -662,7 +675,7 @@ private struct shift_t
private struct reduce_t
{
/** Lookahead token. */
p_token_t token;
<%= @grammar.prefix %>token_t token;
/**
* Rule ID.
@ -716,7 +729,7 @@ private struct state_value_t
size_t state_id;
/** Parser value from this state. */
p_value_t pvalue;
<%= @grammar.prefix %>value_t pvalue;
this(size_t state_id)
{
@ -752,9 +765,9 @@ private immutable parser_state_t[] parser_state_table = [
*
* @return Parse value.
*/
private p_value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
{
p_value_t _pvalue;
<%= @grammar.prefix %>value_t _pvalue;
switch (rule)
{
@ -805,7 +818,7 @@ private size_t check_shift(size_t state_id, size_t symbol_id)
*
* @return State to reduce to, or INVALID_ID if none.
*/
private size_t check_reduce(size_t state_id, p_token_t token)
private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
{
size_t start = parser_state_table[state_id].reduce_table_index;
size_t end = start + parser_state_table[state_id].n_reduce_entries;
@ -828,7 +841,7 @@ private size_t check_reduce(size_t state_id, p_token_t token)
*
* @retval P_SUCCESS
* The parser successfully matched the input text. The parse result value
* can be accessed with p_result().
* can be accessed with <%= @grammar.prefix %>result().
* @retval P_UNEXPECTED_TOKEN
* An unexpected token was encountered that does not match any grammar rule.
* @reval P_DECODE_ERROR
@ -836,18 +849,18 @@ private size_t check_reduce(size_t state_id, p_token_t token)
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
public size_t p_parse(p_context_t * context)
public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
p_token_info_t token_info;
p_token_t token = INVALID_TOKEN_ID;
<%= @grammar.prefix %>token_info_t token_info;
<%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
state_value_t[] statevalues = new state_value_t[](1);
size_t reduced_rule_set = INVALID_ID;
p_value_t reduced_parser_value;
<%= @grammar.prefix %>value_t reduced_parser_value;
for (;;)
{
if (token == INVALID_TOKEN_ID)
{
size_t lexer_result = p_lex(context, &token_info);
size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info);
if (lexer_result != P_SUCCESS)
{
return lexer_result;
@ -883,7 +896,7 @@ public size_t p_parse(p_context_t * context)
{
/* We shifted a RuleSet. */
statevalues[$-1].pvalue = reduced_parser_value;
p_value_t new_parse_result;
<%= @grammar.prefix %>value_t new_parse_result;
reduced_parser_value = new_parse_result;
reduced_rule_set = INVALID_ID;
}
@ -904,7 +917,7 @@ public size_t p_parse(p_context_t * context)
write("Unexpected token ");
if (token != INVALID_TOKEN_ID)
{
writeln(p_token_names[token]);
writeln(<%= @grammar.prefix %>token_names[token]);
}
else
{
@ -928,7 +941,7 @@ public size_t p_parse(p_context_t * context)
*
* @return Parse result value.
*/
public <%= start_rule_type[1] %> p_result(p_context_t * context)
public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
{
return context.parse_result.v_<%= start_rule_type[0] %>;
}
@ -941,7 +954,7 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
*
* @return Current text position.
*/
public p_position_t p_position(p_context_t * context)
public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context)
{
return context.text_position;
}

View File

@ -12,6 +12,7 @@ class Propane
attr_reader :tokens
attr_reader :code_blocks
attr_reader :ptypes
attr_reader :prefix
def initialize(input)
@patterns = []
@ -23,6 +24,7 @@ class Propane
@mode = nil
@input = input.gsub("\r\n", "\n")
@ptypes = {"default" => "void *"}
@prefix = "p_"
parse_grammar!
end
@ -55,6 +57,7 @@ class Propane
elsif parse_drop_statement!
elsif parse_rule_statement!
elsif parse_code_block_statement!
elsif parse_prefix_statement!
else
if @input.size > 25
@input = @input.slice(0..20) + "..."
@ -195,6 +198,13 @@ class Propane
end
end
def parse_prefix_statement!
if md = consume!(/prefix\s+(#{IDENTIFIER_REGEX})\s*;/)
@prefix = md[1]
true
end
end
def parse_pattern!
if md = consume!(%r{/})
pattern = ""

View File

@ -34,6 +34,7 @@ EOF
expect(grammar.modulename).to eq "a.b"
expect(grammar.ptype).to eq "XYZ *"
expect(grammar.ptypes).to eq("default" => "XYZ *")
expect(grammar.prefix).to eq "p_"
o = grammar.tokens.find {|token| token.name == "while"}
expect(o).to_not be_nil
@ -111,8 +112,11 @@ token code2 <<
>>
tokenid token_with_no_pattern;
prefix myparser_;
EOF
grammar = Grammar.new(input)
expect(grammar.prefix).to eq "myparser_"
o = grammar.tokens.find {|token| token.name == "code1"}
expect(o).to_not be_nil

View File

@ -4,12 +4,14 @@ require "open3"
Results = Struct.new(:stdout, :stderr, :status)
describe Propane do
def write_grammar(grammar)
File.write("spec/run/testparser.propane", grammar)
def write_grammar(grammar, options = {})
options[:name] ||= ""
File.write("spec/run/testparser#{options[:name]}.propane", grammar)
end
def build_parser(options = {})
command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log]
options[:name] ||= ""
command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.d --log spec/run/testparser#{options[:name]}.log]
if (options[:capture])
stdout, stderr, status = Open3.capture3(*command)
Results.new(stdout, stderr, status)
@ -19,8 +21,13 @@ describe Propane do
end
end
def compile(*test_files)
result = system(*%w[ldc2 --unittest -of spec/run/testparser spec/run/testparser.d -Ispec], *test_files)
def compile(test_files, options = {})
test_files = Array(test_files)
options[:parsers] ||= [""]
parsers = options[:parsers].map do |name|
"spec/run/testparser#{name}.d"
end
result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files)
expect(result).to be_truthy
end
@ -377,6 +384,28 @@ EOF
it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.propane"))
build_parser
compile("spec/test_parsing_json.d", "spec/json_types.d")
compile(["spec/test_parsing_json.d", "spec/json_types.d"])
end
it "allows generating multiple parsers in the same program" do
write_grammar(<<EOF, name: "myp1")
prefix myp1_;
token a;
token num /\\d+/;
drop /\\s+/;
Start -> a num;
EOF
build_parser(name: "myp1")
write_grammar(<<EOF, name: "myp2")
prefix myp2_;
token b;
token c;
Start -> b c b;
EOF
build_parser(name: "myp2")
compile("spec/test_multiple_parsers.d", parsers: %w[myp1 myp2])
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end

View File

@ -0,0 +1,21 @@
import testparsermyp1;
import testparsermyp2;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input1 = "a\n1";
myp1_context_t context1;
myp1_context_init(&context1, input1);
assert(myp1_parse(&context1) == MYP1_SUCCESS);
string input2 = "bcb";
myp2_context_t context2;
myp2_context_init(&context2, input2);
assert(myp2_parse(&context2) == MYP2_SUCCESS);
}