Allow configuring API prefix - close #9

This commit is contained in:
Josh Holtrop 2023-07-13 18:06:24 -04:00
parent 7d7929a358
commit 5ce562cbc3
5 changed files with 136 additions and 59 deletions

View File

@ -25,19 +25,19 @@ import std.stdio;
/* Result codes. */ /* Result codes. */
public enum : size_t public enum : size_t
{ {
P_SUCCESS, <%= @grammar.prefix.upcase %>SUCCESS,
P_DECODE_ERROR, <%= @grammar.prefix.upcase %>DECODE_ERROR,
P_UNEXPECTED_INPUT, <%= @grammar.prefix.upcase %>UNEXPECTED_INPUT,
P_UNEXPECTED_TOKEN, <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN,
P_DROP, <%= @grammar.prefix.upcase %>DROP,
P_EOF, <%= @grammar.prefix.upcase %>EOF,
} }
/** Token type. */ /** Token type. */
public alias p_token_t = <%= get_type_for(@grammar.invalid_token_id) %>; public alias <%= @grammar.prefix %>token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
/** Token IDs. */ /** Token IDs. */
public enum : p_token_t public enum : <%= @grammar.prefix %>token_t
{ {
<% @grammar.tokens.each_with_index do |token, index| %> <% @grammar.tokens.each_with_index do |token, index| %>
TOKEN_<%= token.code_name %> = <%= index %>, TOKEN_<%= token.code_name %> = <%= index %>,
@ -49,10 +49,10 @@ public enum : p_token_t
} }
/** Code point type. */ /** Code point type. */
public alias p_code_point_t = uint; public alias <%= @grammar.prefix %>code_point_t = uint;
/** Parser values type(s). */ /** Parser values type(s). */
public union p_value_t public union <%= @grammar.prefix %>value_t
{ {
<% @grammar.ptypes.each do |name, typestring| %> <% @grammar.ptypes.each do |name, typestring| %>
<%= typestring %> v_<%= name %>; <%= typestring %> v_<%= name %>;
@ -64,7 +64,7 @@ public union p_value_t
* *
* This is useful for reporting errors, etc... * This is useful for reporting errors, etc...
*/ */
public struct p_position_t public struct <%= @grammar.prefix %>position_t
{ {
/** Input text row (0-based). */ /** Input text row (0-based). */
uint row; uint row;
@ -74,19 +74,19 @@ public struct p_position_t
} }
/** Lexed token information. */ /** Lexed token information. */
public struct p_token_info_t public struct <%= @grammar.prefix %>token_info_t
{ {
/** Text position where the token was found. */ /** Text position where the token was found. */
p_position_t position; <%= @grammar.prefix %>position_t position;
/** Number of input bytes used by the token. */ /** Number of input bytes used by the token. */
size_t length; size_t length;
/** Token that was lexed. */ /** Token that was lexed. */
p_token_t token; <%= @grammar.prefix %>token_t token;
/** Parser value associated with the token. */ /** Parser value associated with the token. */
p_value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
} }
/** /**
@ -95,7 +95,7 @@ public struct p_token_info_t
* The user must allocate an instance of this structure and pass it to any * The user must allocate an instance of this structure and pass it to any
* public API function. * public API function.
*/ */
public struct p_context_t public struct <%= @grammar.prefix %>context_t
{ {
/* Lexer context data. */ /* Lexer context data. */
@ -106,7 +106,7 @@ public struct p_context_t
size_t input_index; size_t input_index;
/** Input text position (row/column). */ /** Input text position (row/column). */
p_position_t text_position; <%= @grammar.prefix %>position_t text_position;
/** Current lexer mode. */ /** Current lexer mode. */
size_t mode; size_t mode;
@ -114,7 +114,7 @@ public struct p_context_t
/* Parser context data. */ /* Parser context data. */
/** Parse result value. */ /** Parse result value. */
p_value_t parse_result; <%= @grammar.prefix %>value_t parse_result;
} }
/************************************************************************** /**************************************************************************
@ -122,7 +122,7 @@ public struct p_context_t
*************************************************************************/ *************************************************************************/
/** Token names. */ /** Token names. */
public immutable string[] p_token_names = [ public immutable string[] <%= @grammar.prefix %>token_names = [
<% @grammar.tokens.each_with_index do |token, index| %> <% @grammar.tokens.each_with_index do |token, index| %>
"<%= token.name %>", "<%= token.name %>",
<% end %> <% end %>
@ -132,6 +132,19 @@ public immutable string[] p_token_names = [
* Private types * Private types
*************************************************************************/ *************************************************************************/
<% if @grammar.prefix.upcase != "P_" %>
/* Result codes. */
private enum : size_t
{
P_SUCCESS,
P_DECODE_ERROR,
P_UNEXPECTED_INPUT,
P_UNEXPECTED_TOKEN,
P_DROP,
P_EOF,
}
<% end %>
/* An invalid ID value. */ /* An invalid ID value. */
private enum size_t INVALID_ID = cast(size_t)-1; private enum size_t INVALID_ID = cast(size_t)-1;
@ -147,10 +160,10 @@ private enum size_t INVALID_ID = cast(size_t)-1;
* @param input * @param input
* Text input. * Text input.
*/ */
public void p_context_init(p_context_t * context, string input) public void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, string input)
{ {
/* New default-initialized context structure. */ /* New default-initialized context structure. */
p_context_t newcontext; <%= @grammar.prefix %>context_t newcontext;
/* Lexer initialization. */ /* Lexer initialization. */
newcontext.input = input; newcontext.input = input;
@ -179,15 +192,15 @@ public void p_context_init(p_context_t * context, string input)
* @retval P_DECODE_ERROR when an encoding error is observed * @retval P_DECODE_ERROR when an encoding error is observed
* @retval P_EOF when the end of the text input is reached * @retval P_EOF when the end of the text input is reached
*/ */
public size_t p_decode_code_point(string input, public size_t <%= @grammar.prefix %>decode_code_point(string input,
p_code_point_t * out_code_point, ubyte * out_code_point_length) <%= @grammar.prefix %>code_point_t * out_code_point, ubyte * out_code_point_length)
{ {
if (input.length == 0u) if (input.length == 0u)
{ {
return P_EOF; return P_EOF;
} }
char c = input[0]; char c = input[0];
p_code_point_t code_point; <%= @grammar.prefix %>code_point_t code_point;
ubyte code_point_length; ubyte code_point_length;
if ((c & 0x80u) == 0u) if ((c & 0x80u) == 0u)
{ {
@ -272,10 +285,10 @@ private enum lexer_user_code_id_t INVALID_USER_CODE_ID = <%= user_code_id_count
private struct lexer_transition_t private struct lexer_transition_t
{ {
/** First code point in the range for this transition. */ /** First code point in the range for this transition. */
p_code_point_t first; <%= @grammar.prefix %>code_point_t first;
/** Last code point in the range for this transition. */ /** Last code point in the range for this transition. */
p_code_point_t last; <%= @grammar.prefix %>code_point_t last;
/** Destination lexer state ID for this transition. */ /** Destination lexer state ID for this transition. */
lexer_state_id_t destination_state; lexer_state_id_t destination_state;
@ -291,7 +304,7 @@ private struct lexer_state_t
<%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions; <%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions;
/** Lexer token formed at this state. */ /** Lexer token formed at this state. */
p_token_t token; <%= @grammar.prefix %>token_t token;
/** Lexer user code ID to execute at this state. */ /** Lexer user code ID to execute at this state. */
lexer_user_code_id_t code_id; lexer_user_code_id_t code_id;
@ -319,7 +332,7 @@ private struct lexer_match_info_t
size_t length; size_t length;
/** Input text position delta. */ /** Input text position delta. */
p_position_t delta_position; <%= @grammar.prefix %>position_t delta_position;
/** Accepting lexer state from the match. */ /** Accepting lexer state from the match. */
const(lexer_state_t) * accepting_state; const(lexer_state_t) * accepting_state;
@ -361,9 +374,9 @@ private immutable lexer_mode_t[] lexer_mode_table = [
* @return Token to accept, or invalid token if the user code does * @return Token to accept, or invalid token if the user code does
* not explicitly return a token. * not explicitly return a token.
*/ */
private p_token_t lexer_user_code(p_context_t * context, private <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context,
lexer_user_code_id_t code_id, string match, lexer_user_code_id_t code_id, string match,
p_token_info_t * out_token_info) <%= @grammar.prefix %>token_info_t * out_token_info)
{ {
switch (code_id) switch (code_id)
{ {
@ -423,7 +436,7 @@ private lexer_state_id_t check_lexer_transition(uint current_state, uint code_po
* @retval P_EOF * @retval P_EOF
* The end of the text input was reached. * The end of the text input was reached.
*/ */
private size_t find_longest_match(p_context_t * context, private size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length) lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length)
{ {
lexer_match_info_t longest_match; lexer_match_info_t longest_match;
@ -433,9 +446,9 @@ private size_t find_longest_match(p_context_t * context,
for (;;) for (;;)
{ {
string input = context.input[(context.input_index + attempt_match.length)..(context.input.length)]; string input = context.input[(context.input_index + attempt_match.length)..(context.input.length)];
p_code_point_t code_point; <%= @grammar.prefix %>code_point_t code_point;
ubyte code_point_length; ubyte code_point_length;
size_t result = p_decode_code_point(input, &code_point, &code_point_length); size_t result = <%= @grammar.prefix %>decode_code_point(input, &code_point, &code_point_length);
switch (result) switch (result)
{ {
case P_SUCCESS: case P_SUCCESS:
@ -523,9 +536,9 @@ private size_t find_longest_match(p_context_t * context,
* @retval P_DROP * @retval P_DROP
* A drop pattern was matched so the lexer should continue. * A drop pattern was matched so the lexer should continue.
*/ */
private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info) private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{ {
p_token_info_t token_info; <%= @grammar.prefix %>token_info_t token_info;
token_info.position = context.text_position; token_info.position = context.text_position;
token_info.token = INVALID_TOKEN_ID; token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove *out_token_info = token_info; // TODO: remove
@ -535,11 +548,11 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
switch (result) switch (result)
{ {
case P_SUCCESS: case P_SUCCESS:
p_token_t token_to_accept = match_info.accepting_state.token; <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state.token;
if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID) if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID)
{ {
string match = context.input[context.input_index..(context.input_index + match_info.length)]; string match = context.input[context.input_index..(context.input_index + match_info.length)];
p_token_t user_code_token = lexer_user_code(context, <%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
match_info.accepting_state.code_id, match, &token_info); match_info.accepting_state.code_id, match, &token_info);
/* An invalid token returned from lexer_user_code() means that the /* An invalid token returned from lexer_user_code() means that the
* user code did not explicitly return a token. So only override * user code did not explicitly return a token. So only override
@ -612,7 +625,7 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
* @reval P_UNEXPECTED_INPUT * @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern. * Input text does not match any lexer pattern.
*/ */
public size_t p_lex(p_context_t * context, p_token_info_t * out_token_info) public size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
{ {
for (;;) for (;;)
{ {
@ -662,7 +675,7 @@ private struct shift_t
private struct reduce_t private struct reduce_t
{ {
/** Lookahead token. */ /** Lookahead token. */
p_token_t token; <%= @grammar.prefix %>token_t token;
/** /**
* Rule ID. * Rule ID.
@ -716,7 +729,7 @@ private struct state_value_t
size_t state_id; size_t state_id;
/** Parser value from this state. */ /** Parser value from this state. */
p_value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
this(size_t state_id) this(size_t state_id)
{ {
@ -752,9 +765,9 @@ private immutable parser_state_t[] parser_state_table = [
* *
* @return Parse value. * @return Parse value.
*/ */
private p_value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states) private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
{ {
p_value_t _pvalue; <%= @grammar.prefix %>value_t _pvalue;
switch (rule) switch (rule)
{ {
@ -805,7 +818,7 @@ private size_t check_shift(size_t state_id, size_t symbol_id)
* *
* @return State to reduce to, or INVALID_ID if none. * @return State to reduce to, or INVALID_ID if none.
*/ */
private size_t check_reduce(size_t state_id, p_token_t token) private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
{ {
size_t start = parser_state_table[state_id].reduce_table_index; size_t start = parser_state_table[state_id].reduce_table_index;
size_t end = start + parser_state_table[state_id].n_reduce_entries; size_t end = start + parser_state_table[state_id].n_reduce_entries;
@ -828,7 +841,7 @@ private size_t check_reduce(size_t state_id, p_token_t token)
* *
* @retval P_SUCCESS * @retval P_SUCCESS
* The parser successfully matched the input text. The parse result value * The parser successfully matched the input text. The parse result value
* can be accessed with p_result(). * can be accessed with <%= @grammar.prefix %>result().
* @retval P_UNEXPECTED_TOKEN * @retval P_UNEXPECTED_TOKEN
* An unexpected token was encountered that does not match any grammar rule. * An unexpected token was encountered that does not match any grammar rule.
* @reval P_DECODE_ERROR * @reval P_DECODE_ERROR
@ -836,18 +849,18 @@ private size_t check_reduce(size_t state_id, p_token_t token)
* @reval P_UNEXPECTED_INPUT * @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern. * Input text does not match any lexer pattern.
*/ */
public size_t p_parse(p_context_t * context) public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
p_token_info_t token_info; <%= @grammar.prefix %>token_info_t token_info;
p_token_t token = INVALID_TOKEN_ID; <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
state_value_t[] statevalues = new state_value_t[](1); state_value_t[] statevalues = new state_value_t[](1);
size_t reduced_rule_set = INVALID_ID; size_t reduced_rule_set = INVALID_ID;
p_value_t reduced_parser_value; <%= @grammar.prefix %>value_t reduced_parser_value;
for (;;) for (;;)
{ {
if (token == INVALID_TOKEN_ID) if (token == INVALID_TOKEN_ID)
{ {
size_t lexer_result = p_lex(context, &token_info); size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info);
if (lexer_result != P_SUCCESS) if (lexer_result != P_SUCCESS)
{ {
return lexer_result; return lexer_result;
@ -883,7 +896,7 @@ public size_t p_parse(p_context_t * context)
{ {
/* We shifted a RuleSet. */ /* We shifted a RuleSet. */
statevalues[$-1].pvalue = reduced_parser_value; statevalues[$-1].pvalue = reduced_parser_value;
p_value_t new_parse_result; <%= @grammar.prefix %>value_t new_parse_result;
reduced_parser_value = new_parse_result; reduced_parser_value = new_parse_result;
reduced_rule_set = INVALID_ID; reduced_rule_set = INVALID_ID;
} }
@ -904,7 +917,7 @@ public size_t p_parse(p_context_t * context)
write("Unexpected token "); write("Unexpected token ");
if (token != INVALID_TOKEN_ID) if (token != INVALID_TOKEN_ID)
{ {
writeln(p_token_names[token]); writeln(<%= @grammar.prefix %>token_names[token]);
} }
else else
{ {
@ -928,7 +941,7 @@ public size_t p_parse(p_context_t * context)
* *
* @return Parse result value. * @return Parse result value.
*/ */
public <%= start_rule_type[1] %> p_result(p_context_t * context) public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
{ {
return context.parse_result.v_<%= start_rule_type[0] %>; return context.parse_result.v_<%= start_rule_type[0] %>;
} }
@ -941,7 +954,7 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
* *
* @return Current text position. * @return Current text position.
*/ */
public p_position_t p_position(p_context_t * context) public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context)
{ {
return context.text_position; return context.text_position;
} }

View File

@ -12,6 +12,7 @@ class Propane
attr_reader :tokens attr_reader :tokens
attr_reader :code_blocks attr_reader :code_blocks
attr_reader :ptypes attr_reader :ptypes
attr_reader :prefix
def initialize(input) def initialize(input)
@patterns = [] @patterns = []
@ -23,6 +24,7 @@ class Propane
@mode = nil @mode = nil
@input = input.gsub("\r\n", "\n") @input = input.gsub("\r\n", "\n")
@ptypes = {"default" => "void *"} @ptypes = {"default" => "void *"}
@prefix = "p_"
parse_grammar! parse_grammar!
end end
@ -55,6 +57,7 @@ class Propane
elsif parse_drop_statement! elsif parse_drop_statement!
elsif parse_rule_statement! elsif parse_rule_statement!
elsif parse_code_block_statement! elsif parse_code_block_statement!
elsif parse_prefix_statement!
else else
if @input.size > 25 if @input.size > 25
@input = @input.slice(0..20) + "..." @input = @input.slice(0..20) + "..."
@ -195,6 +198,13 @@ class Propane
end end
end end
def parse_prefix_statement!
if md = consume!(/prefix\s+(#{IDENTIFIER_REGEX})\s*;/)
@prefix = md[1]
true
end
end
def parse_pattern! def parse_pattern!
if md = consume!(%r{/}) if md = consume!(%r{/})
pattern = "" pattern = ""

View File

@ -34,6 +34,7 @@ EOF
expect(grammar.modulename).to eq "a.b" expect(grammar.modulename).to eq "a.b"
expect(grammar.ptype).to eq "XYZ *" expect(grammar.ptype).to eq "XYZ *"
expect(grammar.ptypes).to eq("default" => "XYZ *") expect(grammar.ptypes).to eq("default" => "XYZ *")
expect(grammar.prefix).to eq "p_"
o = grammar.tokens.find {|token| token.name == "while"} o = grammar.tokens.find {|token| token.name == "while"}
expect(o).to_not be_nil expect(o).to_not be_nil
@ -111,8 +112,11 @@ token code2 <<
>> >>
tokenid token_with_no_pattern; tokenid token_with_no_pattern;
prefix myparser_;
EOF EOF
grammar = Grammar.new(input) grammar = Grammar.new(input)
expect(grammar.prefix).to eq "myparser_"
o = grammar.tokens.find {|token| token.name == "code1"} o = grammar.tokens.find {|token| token.name == "code1"}
expect(o).to_not be_nil expect(o).to_not be_nil

View File

@ -4,12 +4,14 @@ require "open3"
Results = Struct.new(:stdout, :stderr, :status) Results = Struct.new(:stdout, :stderr, :status)
describe Propane do describe Propane do
def write_grammar(grammar) def write_grammar(grammar, options = {})
File.write("spec/run/testparser.propane", grammar) options[:name] ||= ""
File.write("spec/run/testparser#{options[:name]}.propane", grammar)
end end
def build_parser(options = {}) def build_parser(options = {})
command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log] options[:name] ||= ""
command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.d --log spec/run/testparser#{options[:name]}.log]
if (options[:capture]) if (options[:capture])
stdout, stderr, status = Open3.capture3(*command) stdout, stderr, status = Open3.capture3(*command)
Results.new(stdout, stderr, status) Results.new(stdout, stderr, status)
@ -19,8 +21,13 @@ describe Propane do
end end
end end
def compile(*test_files) def compile(test_files, options = {})
result = system(*%w[ldc2 --unittest -of spec/run/testparser spec/run/testparser.d -Ispec], *test_files) test_files = Array(test_files)
options[:parsers] ||= [""]
parsers = options[:parsers].map do |name|
"spec/run/testparser#{name}.d"
end
result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files)
expect(result).to be_truthy expect(result).to be_truthy
end end
@ -377,6 +384,28 @@ EOF
it "allows creating a JSON parser" do it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.propane")) write_grammar(File.read("spec/json_parser.propane"))
build_parser build_parser
compile("spec/test_parsing_json.d", "spec/json_types.d") compile(["spec/test_parsing_json.d", "spec/json_types.d"])
end
it "allows generating multiple parsers in the same program" do
write_grammar(<<EOF, name: "myp1")
prefix myp1_;
token a;
token num /\\d+/;
drop /\\s+/;
Start -> a num;
EOF
build_parser(name: "myp1")
write_grammar(<<EOF, name: "myp2")
prefix myp2_;
token b;
token c;
Start -> b c b;
EOF
build_parser(name: "myp2")
compile("spec/test_multiple_parsers.d", parsers: %w[myp1 myp2])
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end end
end end

View File

@ -0,0 +1,21 @@
import testparsermyp1;
import testparsermyp2;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input1 = "a\n1";
myp1_context_t context1;
myp1_context_init(&context1, input1);
assert(myp1_parse(&context1) == MYP1_SUCCESS);
string input2 = "bcb";
myp2_context_t context2;
myp2_context_init(&context2, input2);
assert(myp2_parse(&context2) == MYP2_SUCCESS);
}