Support C++ output language

2026-02-07 18:59:01 -05:00 · 2026-02-07 18:59:01 -05:00 · 072af73b1e
commit 072af73b1e
parent ac8ed4bf5a
12 changed files with 80 additions and 46 deletions
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@ Propane is a LALR Parser Generator (LPG) which:
  * generates a built-in lexer to tokenize input
  * supports UTF-8 lexer inputs
  * generates a table-driven shift/reduce parser to parse input in linear time
-  * targets C or D language outputs
+  * targets C, C++, or D language outputs
  * optionally supports automatic full AST generation
  * is MIT-licensed
  * is distributable as a standalone Ruby script
--- a/12
+++ b/12
@ -1,5 +1,7 @@
 require "rake/clean"
 require "rspec/core/rake_task"
 require "simplecov"
 require "stringio"
 CLEAN.include %w[spec/run gen .yardoc yard coverage dist]
@ -12,6 +14,16 @@ RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
    task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
  end
 end
 task :spec do |task, args|
  original_stdout = $stdout
  sio = StringIO.new
  $stdout = sio
  SimpleCov.collate Dir["coverage/.resultset.json"]
  $stdout = original_stdout
  sio.string.lines.each do |line|
    $stdout.write(line) unless line =~ /Coverage report generated for/
  end
 end
 # dspec task is useful to test the distributable release script, but is not
 # useful for coverage information.
--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -805,7 +805,7 @@ static void state_values_stack_push(state_values_stack_t * stack)
    if (current_length >= current_capacity)
    {
        size_t const new_capacity = current_capacity * 2u;
-        state_value_t * new_entries = malloc(new_capacity * sizeof(state_value_t));
+        state_value_t * new_entries = (state_value_t *)malloc(new_capacity * sizeof(state_value_t));
        memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t));
        free(stack->entries);
        stack->capacity = new_capacity;
@ -989,7 +989,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = (<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> *)malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
                token_ast_node->position = token_info.position;
                token_ast_node->end_position = token_info.end_position;
                token_ast_node->token = token;
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -13,7 +13,7 @@ Propane is a LALR Parser Generator (LPG) which:
  * generates a built-in lexer to tokenize input
  * supports UTF-8 lexer inputs
  * generates a table-driven shift/reduce parser to parse input in linear time
-  * targets C or D language outputs
+  * targets C, C++, or D language outputs
  * optionally supports automatic full AST generation
  * tracks input text start and end positions for all matched tokens/rules
  * is MIT-licensed
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -11,24 +11,28 @@ class Propane
        @log = StringIO.new
      end
      @language =
-        if output_file =~ /\.([a-z]+)$/
+        if output_file.end_with?(".d")
          $1
        else
          "d"
        else
          "c"
        end
      @options = options
      process_grammar!
    end
    def generate
-      extensions = [@language]
+      extensions = [nil]
      if @language == "c"
        extensions += %w[h]
      end
      extensions.each do |extension|
-        template = Assets.get("parser.#{extension}.erb")
+        template = Assets.get("parser.#{extension || @language}.erb")
        if extension
          output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}")
        else
          output_file = @output_file
        end
        erb = ERB.new(template, trim_mode: "<>")
        output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}")
        result = erb.result(binding.clone)
        File.open(output_file, "wb") do |fh|
          fh.write(result)
--- a/spec/json_parser.c.propane
+++ b/spec/json_parser.c.propane
@ -120,11 +120,11 @@ string: /\\t/ <<
 >>
 string: /\\u[0-9a-fA-F]{4}/ <<
  /* Not actually going to encode the code point for this example... */
-  char s[] = {'{', match[2], match[3], match[4], match[5], '}', 0};
+  char s[] = {'{', (char)match[2], (char)match[3], (char)match[4], (char)match[5], '}', 0};
  str_append(&string_value, s);
 >>
 string: /[^\\]/ <<
-  char s[] = {match[0], 0};
+  char s[] = {(char)match[0], 0};
  str_append(&string_value, s);
 >>
 Start -> Value <<
--- a/spec/json_types.c
+++ b/spec/json_types.c
@ -5,7 +5,7 @@
 JSONValue * JSONValue_new(size_t id)
 {
-    JSONValue * jv = calloc(1, sizeof(JSONValue));
+    JSONValue * jv = (JSONValue *)calloc(1, sizeof(JSONValue));
    jv->id = id;
    return jv;
 }
@ -29,7 +29,7 @@ void JSONObject_append(JSONValue * object, char const * name, JSONValue * value)
        }
    }
    size_t const new_size = size + 1;
-    void * new_entries = malloc(sizeof(object->object.entries[0]) * new_size);
+    JSONObjectEntry * new_entries = (JSONObjectEntry *)malloc(sizeof(object->object.entries[0]) * new_size);
    if (size > 0)
    {
        memcpy(new_entries, object->object.entries, size * sizeof(object->object.entries[0]));
@ -52,7 +52,7 @@ void JSONArray_append(JSONValue * array, JSONValue * value)
 {
    size_t const size = array->array.size;
    size_t const new_size = size + 1;
-    JSONValue ** new_entries = malloc(sizeof(JSONValue *) * new_size);
+    JSONValue ** new_entries = (JSONValue **)malloc(sizeof(JSONValue *) * new_size);
    if (array->array.size > 0)
    {
        memcpy(new_entries, array->array.entries, sizeof(JSONValue *) * size);
--- a/spec/json_types.h
+++ b/spec/json_types.h
@ -11,6 +11,12 @@
 #define JSON_FALSE 5u
 #define JSON_NULL 6u
 typedef struct JSONObjectEntry_s
 {
    char const * name;
    struct JSONValue_s * value;
 } JSONObjectEntry;
 typedef struct JSONValue_s
 {
    size_t id;
@ -19,11 +25,7 @@ typedef struct JSONValue_s
        struct
        {
            size_t size;
-            struct
+            JSONObjectEntry * entries;
            {
                char const * name;
                struct JSONValue_s * value;
            } * entries;
        } object;
        struct
        {
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -65,17 +65,26 @@ EOF
  end
  def compile(test_files, options = {})
-    test_files = Array(test_files)
+    test_files = Array(test_files).map do |test_file|
      if !File.exist?(test_file) && test_file.end_with?(".cpp")
        test_file.sub(%r{\.cpp$}, ".c")
      else
        test_file
      end
    end
    options[:parsers] ||= [""]
    parsers = options[:parsers].map do |name|
      "spec/run/testparser#{name}.#{options[:language]}"
    end
    case options[:language]
    when "c"
-      result = system(*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm")
+      command = [*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm"]
    when "cpp"
      command = [*%w[g++ -x c++ -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm"]
    when "d"
-      result = system(*%w[ldc2 -g --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d")
+      command = [*%w[ldc2 -g --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d"]
    end
    result = system(*command)
    expect(result).to be_truthy
  end
@ -261,7 +270,7 @@ EOF
    expect(results.status).to_not eq 0
  end
-  %w[d c].each do |language|
+  %w[d c cpp].each do |language|
    context "#{language.upcase} language" do
@ -284,7 +293,7 @@ EOF
      it "detects a lexer error when an unknown character is seen" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 ptype int;
 token int /\\d+/ <<
@ -338,7 +347,7 @@ EOF
      it "generates a parser that does basic math - user guide example" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <math.h>
@ -456,7 +465,7 @@ EOF
      it "executes user code when matching lexer token" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -498,7 +507,7 @@ EOF
      it "supports a pattern statement" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -534,7 +543,7 @@ EOF
      it "supports returning tokens from pattern code blocks" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -574,7 +583,7 @@ EOF
      it "supports lexer modes" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -635,7 +644,7 @@ EOF
      it "multiple lexer modes may apply to a pattern" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -691,7 +700,7 @@ EOF
      it "executes user code associated with a parser rule" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
@ -727,7 +736,7 @@ EOF
      it "parses lists" do
        write_grammar <<EOF
-ptype #{language == "c" ? "uint32_t" : "uint"};
+ptype #{language == "d" ? "uint" : "uint32_t"};
 token a;
 Start -> As << $$ = $1; >>
 As -> << $$ = 0u; >>
@ -762,14 +771,14 @@ EOF
      it "provides matched text to user code blocks" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
 #include <stdio.h>
 #include <stdlib.h>
 >>
 token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
-  char * t = malloc(match_length + 1);
+  char * t = (char *)malloc(match_length + 1);
  strncpy(t, (char *)match, match_length);
  printf("Matched token is %s\\n", t);
  free(t);
@ -799,7 +808,7 @@ EOF
      it "allows storing a result value for the lexer" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 ptype uint64_t;
 token word /[a-z]+/ <<
@ -843,7 +852,8 @@ EOF
      end
      it "allows creating a JSON parser" do
-        write_grammar(File.read("spec/json_parser.#{language}.propane"))
+        ext = language == "cpp" ? "c" : language
        write_grammar(File.read("spec/json_parser.#{ext}.propane"))
        run_propane(language: language)
        compile(["spec/test_parsing_json.#{language}", "spec/json_types.#{language}"], language: language)
      end
@ -909,7 +919,7 @@ EOF
      it "matches backslash escape sequences" do
        case language
-        when "c"
+        when "c", "cpp"
          write_grammar <<EOF
 <<
  #include <stdio.h>
@ -1123,8 +1133,8 @@ Start -> a? b R? <<
  printf("b: %d\\n", $2);
  printf("R: %s\\n", $3 == NULL ? "" : $3);
 >>
-R -> c d << $$ = "cd"; >>
+R -> c d << $$ = (char *)"cd"; >>
-R (string) -> d c << $$ = "dc"; >>
+R (string) -> d c << $$ = (char *)"dc"; >>
 EOF
        end
        run_propane(language: language)
@ -1334,7 +1344,7 @@ EOF
 >>
 ptype char const *;
 token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
-  char * s = malloc(match_length + 1);
+  char * s = (char *)malloc(match_length + 1);
  strncpy(s, (char const *)match, match_length);
  s[match_length] = 0;
  $$ = s;
@ -1381,7 +1391,7 @@ EOF
 >>
 ptype char const *;
 token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
-  char * s = malloc(match_length + 1);
+  char * s = (char *)malloc(match_length + 1);
  strncpy(s, (char const *)match, match_length);
  s[match_length] = 0;
  $$ = s;
@ -1405,7 +1415,8 @@ EOF
      end
      it "does not free memory allocated for AST nodes" do
-        write_grammar(File.read("spec/ast_node_memory_remains.#{language}.propane"))
+        ext = language == "cpp" ? "c" : language
        write_grammar(File.read("spec/ast_node_memory_remains.#{ext}.propane"))
        run_propane(language: language)
        compile("spec/test_ast_node_memory_remains.#{language}", language: language)
        results = run_test
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@ -2,6 +2,10 @@ unless ENV["dist_specs"]
  require "bundler/setup"
  require "simplecov"
  class MyFormatter
    def format(*args)
    end
  end
  SimpleCov.start do
    add_filter "/spec/"
    add_filter "/.bundle/"
@ -12,6 +16,7 @@ unless ENV["dist_specs"]
    end
    project_name "Propane"
    merge_timeout 3600
    formatter(MyFormatter)
  end
  RSpec.configure do |config|
--- a/spec/test_ast_node_memory_remains.c
+++ b/spec/test_ast_node_memory_remains.c
@ -386,7 +386,7 @@ int main(int argc, char * argv[])
        }
        pmis = pmis->pModuleItems;
    }
-    pfds = malloc(n_pfds * sizeof(PModuleItems *));
+    pfds = (PFunctionDefinition **)malloc(n_pfds * sizeof(PModuleItems *));
    pmis = pmod->pModuleItems;
    size_t pfd_i = n_pfds;
    while (pmis != NULL)
--- a/spec/testutils.c
+++ b/spec/testutils.c
@ -17,7 +17,7 @@ void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_
 void str_init(str_t * str, char const * cs)
 {
    size_t length = strlen(cs);
-    str->cs = malloc(length + 1u);
+    str->cs = (char *)malloc(length + 1u);
    strcpy(str->cs, cs);
 }
@ -25,7 +25,7 @@ void str_append(str_t * str, char const * cs)
 {
    size_t length = strlen(str->cs);
    size_t length2 = strlen(cs);
-    char * new_cs = malloc(length + length2 + 1u);
+    char * new_cs = (char *)malloc(length + length2 + 1u);
    memcpy(new_cs, str->cs, length);
    strcpy(&new_cs[length], cs);
    free(str->cs);