Allow configuring API prefix - close #9

2023-07-13 18:06:24 -04:00 · 2023-07-13 18:06:24 -04:00 · 5ce562cbc3
commit 5ce562cbc3
parent 7d7929a358
5 changed files with 136 additions and 59 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -25,19 +25,19 @@ import std.stdio;
 /* Result codes. */
 public enum : size_t
 {
-    P_SUCCESS,
+    <%= @grammar.prefix.upcase %>SUCCESS,
-    P_DECODE_ERROR,
+    <%= @grammar.prefix.upcase %>DECODE_ERROR,
-    P_UNEXPECTED_INPUT,
+    <%= @grammar.prefix.upcase %>UNEXPECTED_INPUT,
-    P_UNEXPECTED_TOKEN,
+    <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN,
-    P_DROP,
+    <%= @grammar.prefix.upcase %>DROP,
-    P_EOF,
+    <%= @grammar.prefix.upcase %>EOF,
 }
 /** Token type. */
-public alias p_token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
+public alias <%= @grammar.prefix %>token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
 /** Token IDs. */
-public enum : p_token_t
+public enum : <%= @grammar.prefix %>token_t
 {
 <% @grammar.tokens.each_with_index do |token, index| %>
    TOKEN_<%= token.code_name %> = <%= index %>,
@ -49,10 +49,10 @@ public enum : p_token_t
 }
 /** Code point type. */
-public alias p_code_point_t = uint;
+public alias <%= @grammar.prefix %>code_point_t = uint;
 /** Parser values type(s). */
-public union p_value_t
+public union <%= @grammar.prefix %>value_t
 {
 <% @grammar.ptypes.each do |name, typestring| %>
    <%= typestring %> v_<%= name %>;
@ -64,7 +64,7 @@ public union p_value_t
 *
 * This is useful for reporting errors, etc...
 */
-public struct p_position_t
+public struct <%= @grammar.prefix %>position_t
 {
    /** Input text row (0-based). */
    uint row;
@ -74,19 +74,19 @@ public struct p_position_t
 }
 /** Lexed token information. */
-public struct p_token_info_t
+public struct <%= @grammar.prefix %>token_info_t
 {
    /** Text position where the token was found. */
-    p_position_t position;
+    <%= @grammar.prefix %>position_t position;
    /** Number of input bytes used by the token. */
    size_t length;
    /** Token that was lexed. */
-    p_token_t token;
+    <%= @grammar.prefix %>token_t token;
    /** Parser value associated with the token. */
-    p_value_t pvalue;
+    <%= @grammar.prefix %>value_t pvalue;
 }
 /**
@ -95,7 +95,7 @@ public struct p_token_info_t
 * The user must allocate an instance of this structure and pass it to any
 * public API function.
 */
-public struct p_context_t
+public struct <%= @grammar.prefix %>context_t
 {
    /* Lexer context data. */
@ -106,7 +106,7 @@ public struct p_context_t
    size_t input_index;
    /** Input text position (row/column). */
-    p_position_t text_position;
+    <%= @grammar.prefix %>position_t text_position;
    /** Current lexer mode. */
    size_t mode;
@ -114,7 +114,7 @@ public struct p_context_t
    /* Parser context data. */
    /** Parse result value. */
-    p_value_t parse_result;
+    <%= @grammar.prefix %>value_t parse_result;
 }
 /**************************************************************************
@ -122,7 +122,7 @@ public struct p_context_t
 *************************************************************************/
 /** Token names. */
-public immutable string[] p_token_names = [
+public immutable string[] <%= @grammar.prefix %>token_names = [
 <% @grammar.tokens.each_with_index do |token, index| %>
    "<%= token.name %>",
 <% end %>
@ -132,6 +132,19 @@ public immutable string[] p_token_names = [
 * Private types
 *************************************************************************/
 <% if @grammar.prefix.upcase != "P_" %>
 /* Result codes. */
 private enum : size_t
 {
    P_SUCCESS,
    P_DECODE_ERROR,
    P_UNEXPECTED_INPUT,
    P_UNEXPECTED_TOKEN,
    P_DROP,
    P_EOF,
 }
 <% end %>
 /* An invalid ID value. */
 private enum size_t INVALID_ID = cast(size_t)-1;
@ -147,10 +160,10 @@ private enum size_t INVALID_ID = cast(size_t)-1;
 * @param input
 *   Text input.
 */
-public void p_context_init(p_context_t * context, string input)
+public void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, string input)
 {
    /* New default-initialized context structure. */
-    p_context_t newcontext;
+    <%= @grammar.prefix %>context_t newcontext;
    /* Lexer initialization. */
    newcontext.input = input;
@ -179,15 +192,15 @@ public void p_context_init(p_context_t * context, string input)
 * @retval P_DECODE_ERROR when an encoding error is observed
 * @retval P_EOF when the end of the text input is reached
 */
-public size_t p_decode_code_point(string input,
+public size_t <%= @grammar.prefix %>decode_code_point(string input,
-    p_code_point_t * out_code_point, ubyte * out_code_point_length)
+    <%= @grammar.prefix %>code_point_t * out_code_point, ubyte * out_code_point_length)
 {
    if (input.length == 0u)
    {
        return P_EOF;
    }
    char c = input[0];
-    p_code_point_t code_point;
+    <%= @grammar.prefix %>code_point_t code_point;
    ubyte code_point_length;
    if ((c & 0x80u) == 0u)
    {
@ -272,10 +285,10 @@ private enum lexer_user_code_id_t INVALID_USER_CODE_ID = <%= user_code_id_count
 private struct lexer_transition_t
 {
    /** First code point in the range for this transition. */
-    p_code_point_t first;
+    <%= @grammar.prefix %>code_point_t first;
    /** Last code point in the range for this transition. */
-    p_code_point_t last;
+    <%= @grammar.prefix %>code_point_t last;
    /** Destination lexer state ID for this transition. */
    lexer_state_id_t destination_state;
@ -291,7 +304,7 @@ private struct lexer_state_t
    <%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions;
    /** Lexer token formed at this state. */
-    p_token_t token;
+    <%= @grammar.prefix %>token_t token;
    /** Lexer user code ID to execute at this state. */
    lexer_user_code_id_t code_id;
@ -319,7 +332,7 @@ private struct lexer_match_info_t
    size_t length;
    /** Input text position delta. */
-    p_position_t delta_position;
+    <%= @grammar.prefix %>position_t delta_position;
    /** Accepting lexer state from the match. */
    const(lexer_state_t) * accepting_state;
@ -361,9 +374,9 @@ private immutable lexer_mode_t[] lexer_mode_table = [
 * @return Token to accept, or invalid token if the user code does
 *   not explicitly return a token.
 */
-private p_token_t lexer_user_code(p_context_t * context,
+private <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context,
    lexer_user_code_id_t code_id, string match,
-    p_token_info_t * out_token_info)
+    <%= @grammar.prefix %>token_info_t * out_token_info)
 {
    switch (code_id)
    {
@ -423,7 +436,7 @@ private lexer_state_id_t check_lexer_transition(uint current_state, uint code_po
 * @retval P_EOF
 *   The end of the text input was reached.
 */
-private size_t find_longest_match(p_context_t * context,
+private size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
    lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length)
 {
    lexer_match_info_t longest_match;
@ -433,9 +446,9 @@ private size_t find_longest_match(p_context_t * context,
    for (;;)
    {
        string input = context.input[(context.input_index + attempt_match.length)..(context.input.length)];
-        p_code_point_t code_point;
+        <%= @grammar.prefix %>code_point_t code_point;
        ubyte code_point_length;
-        size_t result = p_decode_code_point(input, &code_point, &code_point_length);
+        size_t result = <%= @grammar.prefix %>decode_code_point(input, &code_point, &code_point_length);
        switch (result)
        {
        case P_SUCCESS:
@ -523,9 +536,9 @@ private size_t find_longest_match(p_context_t * context,
 * @retval P_DROP
 *   A drop pattern was matched so the lexer should continue.
 */
-private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
+private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
 {
-    p_token_info_t token_info;
+    <%= @grammar.prefix %>token_info_t token_info;
    token_info.position = context.text_position;
    token_info.token = INVALID_TOKEN_ID;
    *out_token_info = token_info; // TODO: remove
@ -535,11 +548,11 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
    switch (result)
    {
    case P_SUCCESS:
-        p_token_t token_to_accept = match_info.accepting_state.token;
+        <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state.token;
        if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID)
        {
            string match = context.input[context.input_index..(context.input_index + match_info.length)];
-            p_token_t user_code_token = lexer_user_code(context,
+            <%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
                match_info.accepting_state.code_id, match, &token_info);
            /* An invalid token returned from lexer_user_code() means that the
             * user code did not explicitly return a token. So only override
@ -612,7 +625,7 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
 * @reval P_UNEXPECTED_INPUT
 *   Input text does not match any lexer pattern.
 */
-public size_t p_lex(p_context_t * context, p_token_info_t * out_token_info)
+public size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
 {
    for (;;)
    {
@ -662,7 +675,7 @@ private struct shift_t
 private struct reduce_t
 {
    /** Lookahead token. */
-    p_token_t token;
+    <%= @grammar.prefix %>token_t token;
    /**
     * Rule ID.
@ -716,7 +729,7 @@ private struct state_value_t
    size_t state_id;
    /** Parser value from this state. */
-    p_value_t pvalue;
+    <%= @grammar.prefix %>value_t pvalue;
    this(size_t state_id)
    {
@ -752,9 +765,9 @@ private immutable parser_state_t[] parser_state_table = [
 *
 * @return Parse value.
 */
-private p_value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
+private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
 {
-    p_value_t _pvalue;
+    <%= @grammar.prefix %>value_t _pvalue;
    switch (rule)
    {
@ -805,7 +818,7 @@ private size_t check_shift(size_t state_id, size_t symbol_id)
 *
 * @return State to reduce to, or INVALID_ID if none.
 */
-private size_t check_reduce(size_t state_id, p_token_t token)
+private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
 {
    size_t start = parser_state_table[state_id].reduce_table_index;
    size_t end = start + parser_state_table[state_id].n_reduce_entries;
@ -828,7 +841,7 @@ private size_t check_reduce(size_t state_id, p_token_t token)
 *
 * @retval P_SUCCESS
 *   The parser successfully matched the input text. The parse result value
- *   can be accessed with p_result().
+ *   can be accessed with <%= @grammar.prefix %>result().
 * @retval P_UNEXPECTED_TOKEN
 *   An unexpected token was encountered that does not match any grammar rule.
 * @reval P_DECODE_ERROR
@ -836,18 +849,18 @@ private size_t check_reduce(size_t state_id, p_token_t token)
 * @reval P_UNEXPECTED_INPUT
 *   Input text does not match any lexer pattern.
 */
-public size_t p_parse(p_context_t * context)
+public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
 {
-    p_token_info_t token_info;
+    <%= @grammar.prefix %>token_info_t token_info;
-    p_token_t token = INVALID_TOKEN_ID;
+    <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
    state_value_t[] statevalues = new state_value_t[](1);
    size_t reduced_rule_set = INVALID_ID;
-    p_value_t reduced_parser_value;
+    <%= @grammar.prefix %>value_t reduced_parser_value;
    for (;;)
    {
        if (token == INVALID_TOKEN_ID)
        {
-            size_t lexer_result = p_lex(context, &token_info);
+            size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info);
            if (lexer_result != P_SUCCESS)
            {
                return lexer_result;
@ -883,7 +896,7 @@ public size_t p_parse(p_context_t * context)
            {
                /* We shifted a RuleSet. */
                statevalues[$-1].pvalue = reduced_parser_value;
-                p_value_t new_parse_result;
+                <%= @grammar.prefix %>value_t new_parse_result;
                reduced_parser_value = new_parse_result;
                reduced_rule_set = INVALID_ID;
            }
@ -904,7 +917,7 @@ public size_t p_parse(p_context_t * context)
        write("Unexpected token ");
        if (token != INVALID_TOKEN_ID)
        {
-            writeln(p_token_names[token]);
+            writeln(<%= @grammar.prefix %>token_names[token]);
        }
        else
        {
@ -928,7 +941,7 @@ public size_t p_parse(p_context_t * context)
 *
 * @return Parse result value.
 */
-public <%= start_rule_type[1] %> p_result(p_context_t * context)
+public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 {
    return context.parse_result.v_<%= start_rule_type[0] %>;
 }
@ -941,7 +954,7 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
 *
 * @return Current text position.
 */
-public p_position_t p_position(p_context_t * context)
+public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context)
 {
    return context.text_position;
 }
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -12,6 +12,7 @@ class Propane
    attr_reader :tokens
    attr_reader :code_blocks
    attr_reader :ptypes
    attr_reader :prefix
    def initialize(input)
      @patterns = []
@ -23,6 +24,7 @@ class Propane
      @mode = nil
      @input = input.gsub("\r\n", "\n")
      @ptypes = {"default" => "void *"}
      @prefix = "p_"
      parse_grammar!
    end
@ -55,6 +57,7 @@ class Propane
      elsif parse_drop_statement!
      elsif parse_rule_statement!
      elsif parse_code_block_statement!
      elsif parse_prefix_statement!
      else
        if @input.size > 25
          @input = @input.slice(0..20) + "..."
@ -195,6 +198,13 @@ class Propane
      end
    end
    def parse_prefix_statement!
      if md = consume!(/prefix\s+(#{IDENTIFIER_REGEX})\s*;/)
        @prefix = md[1]
        true
      end
    end
    def parse_pattern!
      if md = consume!(%r{/})
        pattern = ""
--- a/spec/propane/grammar_spec.rb
+++ b/spec/propane/grammar_spec.rb
@ -34,6 +34,7 @@ EOF
      expect(grammar.modulename).to eq "a.b"
      expect(grammar.ptype).to eq "XYZ  *"
      expect(grammar.ptypes).to eq("default" => "XYZ  *")
      expect(grammar.prefix).to eq "p_"
      o = grammar.tokens.find {|token| token.name == "while"}
      expect(o).to_not be_nil
@ -111,8 +112,11 @@ token code2 <<
 >>
 tokenid token_with_no_pattern;
 prefix myparser_;
 EOF
      grammar = Grammar.new(input)
      expect(grammar.prefix).to eq "myparser_"
      o = grammar.tokens.find {|token| token.name == "code1"}
      expect(o).to_not be_nil
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -4,12 +4,14 @@ require "open3"
 Results = Struct.new(:stdout, :stderr, :status)
 describe Propane do
-  def write_grammar(grammar)
+  def write_grammar(grammar, options = {})
-    File.write("spec/run/testparser.propane", grammar)
+    options[:name] ||= ""
    File.write("spec/run/testparser#{options[:name]}.propane", grammar)
  end
  def build_parser(options = {})
-    command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log]
+    options[:name] ||= ""
    command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.d --log spec/run/testparser#{options[:name]}.log]
    if (options[:capture])
      stdout, stderr, status = Open3.capture3(*command)
      Results.new(stdout, stderr, status)
@ -19,8 +21,13 @@ describe Propane do
    end
  end
-  def compile(*test_files)
+  def compile(test_files, options = {})
-    result = system(*%w[ldc2 --unittest -of spec/run/testparser spec/run/testparser.d -Ispec], *test_files)
+    test_files = Array(test_files)
    options[:parsers] ||= [""]
    parsers = options[:parsers].map do |name|
      "spec/run/testparser#{name}.d"
    end
    result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files)
    expect(result).to be_truthy
  end
@ -377,6 +384,28 @@ EOF
  it "allows creating a JSON parser" do
    write_grammar(File.read("spec/json_parser.propane"))
    build_parser
-    compile("spec/test_parsing_json.d", "spec/json_types.d")
+    compile(["spec/test_parsing_json.d", "spec/json_types.d"])
  end
  it "allows generating multiple parsers in the same program" do
    write_grammar(<<EOF, name: "myp1")
 prefix myp1_;
 token a;
 token num /\\d+/;
 drop /\\s+/;
 Start -> a num;
 EOF
    build_parser(name: "myp1")
    write_grammar(<<EOF, name: "myp2")
 prefix myp2_;
 token b;
 token c;
 Start -> b c b;
 EOF
    build_parser(name: "myp2")
    compile("spec/test_multiple_parsers.d", parsers: %w[myp1 myp2])
    results = run
    expect(results.stderr).to eq ""
    expect(results.status).to eq 0
  end
 end
--- a/spec/test_multiple_parsers.d
+++ b/spec/test_multiple_parsers.d
@ -0,0 +1,21 @@
 import testparsermyp1;
 import testparsermyp2;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input1 = "a\n1";
    myp1_context_t context1;
    myp1_context_init(&context1, input1);
    assert(myp1_parse(&context1) == MYP1_SUCCESS);
    string input2 = "bcb";
    myp2_context_t context2;
    myp2_context_init(&context2, input2);
    assert(myp2_parse(&context2) == MYP2_SUCCESS);
 }