Support C++ output language

This commit is contained in:
Josh Holtrop 2026-02-07 18:59:01 -05:00
parent ac8ed4bf5a
commit 072af73b1e
12 changed files with 80 additions and 46 deletions

View File

@ -6,7 +6,7 @@ Propane is a LALR Parser Generator (LPG) which:
* generates a built-in lexer to tokenize input * generates a built-in lexer to tokenize input
* supports UTF-8 lexer inputs * supports UTF-8 lexer inputs
* generates a table-driven shift/reduce parser to parse input in linear time * generates a table-driven shift/reduce parser to parse input in linear time
* targets C or D language outputs * targets C, C++, or D language outputs
* optionally supports automatic full AST generation * optionally supports automatic full AST generation
* is MIT-licensed * is MIT-licensed
* is distributable as a standalone Ruby script * is distributable as a standalone Ruby script

View File

@ -1,5 +1,7 @@
require "rake/clean" require "rake/clean"
require "rspec/core/rake_task" require "rspec/core/rake_task"
require "simplecov"
require "stringio"
CLEAN.include %w[spec/run gen .yardoc yard coverage dist] CLEAN.include %w[spec/run gen .yardoc yard coverage dist]
@ -12,6 +14,16 @@ RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation] task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
end end
end end
task :spec do |task, args|
original_stdout = $stdout
sio = StringIO.new
$stdout = sio
SimpleCov.collate Dir["coverage/.resultset.json"]
$stdout = original_stdout
sio.string.lines.each do |line|
$stdout.write(line) unless line =~ /Coverage report generated for/
end
end
# dspec task is useful to test the distributable release script, but is not # dspec task is useful to test the distributable release script, but is not
# useful for coverage information. # useful for coverage information.

View File

@ -805,7 +805,7 @@ static void state_values_stack_push(state_values_stack_t * stack)
if (current_length >= current_capacity) if (current_length >= current_capacity)
{ {
size_t const new_capacity = current_capacity * 2u; size_t const new_capacity = current_capacity * 2u;
state_value_t * new_entries = malloc(new_capacity * sizeof(state_value_t)); state_value_t * new_entries = (state_value_t *)malloc(new_capacity * sizeof(state_value_t));
memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t)); memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t));
free(stack->entries); free(stack->entries);
stack->capacity = new_capacity; stack->capacity = new_capacity;
@ -989,7 +989,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = (<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> *)malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
token_ast_node->position = token_info.position; token_ast_node->position = token_info.position;
token_ast_node->end_position = token_info.end_position; token_ast_node->end_position = token_info.end_position;
token_ast_node->token = token; token_ast_node->token = token;

View File

@ -13,7 +13,7 @@ Propane is a LALR Parser Generator (LPG) which:
* generates a built-in lexer to tokenize input * generates a built-in lexer to tokenize input
* supports UTF-8 lexer inputs * supports UTF-8 lexer inputs
* generates a table-driven shift/reduce parser to parse input in linear time * generates a table-driven shift/reduce parser to parse input in linear time
* targets C or D language outputs * targets C, C++, or D language outputs
* optionally supports automatic full AST generation * optionally supports automatic full AST generation
* tracks input text start and end positions for all matched tokens/rules * tracks input text start and end positions for all matched tokens/rules
* is MIT-licensed * is MIT-licensed

View File

@ -11,24 +11,28 @@ class Propane
@log = StringIO.new @log = StringIO.new
end end
@language = @language =
if output_file =~ /\.([a-z]+)$/ if output_file.end_with?(".d")
$1
else
"d" "d"
else
"c"
end end
@options = options @options = options
process_grammar! process_grammar!
end end
def generate def generate
extensions = [@language] extensions = [nil]
if @language == "c" if @language == "c"
extensions += %w[h] extensions += %w[h]
end end
extensions.each do |extension| extensions.each do |extension|
template = Assets.get("parser.#{extension}.erb") template = Assets.get("parser.#{extension || @language}.erb")
if extension
output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}")
else
output_file = @output_file
end
erb = ERB.new(template, trim_mode: "<>") erb = ERB.new(template, trim_mode: "<>")
output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}")
result = erb.result(binding.clone) result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh| File.open(output_file, "wb") do |fh|
fh.write(result) fh.write(result)

View File

@ -120,11 +120,11 @@ string: /\\t/ <<
>> >>
string: /\\u[0-9a-fA-F]{4}/ << string: /\\u[0-9a-fA-F]{4}/ <<
/* Not actually going to encode the code point for this example... */ /* Not actually going to encode the code point for this example... */
char s[] = {'{', match[2], match[3], match[4], match[5], '}', 0}; char s[] = {'{', (char)match[2], (char)match[3], (char)match[4], (char)match[5], '}', 0};
str_append(&string_value, s); str_append(&string_value, s);
>> >>
string: /[^\\]/ << string: /[^\\]/ <<
char s[] = {match[0], 0}; char s[] = {(char)match[0], 0};
str_append(&string_value, s); str_append(&string_value, s);
>> >>
Start -> Value << Start -> Value <<

View File

@ -5,7 +5,7 @@
JSONValue * JSONValue_new(size_t id) JSONValue * JSONValue_new(size_t id)
{ {
JSONValue * jv = calloc(1, sizeof(JSONValue)); JSONValue * jv = (JSONValue *)calloc(1, sizeof(JSONValue));
jv->id = id; jv->id = id;
return jv; return jv;
} }
@ -29,7 +29,7 @@ void JSONObject_append(JSONValue * object, char const * name, JSONValue * value)
} }
} }
size_t const new_size = size + 1; size_t const new_size = size + 1;
void * new_entries = malloc(sizeof(object->object.entries[0]) * new_size); JSONObjectEntry * new_entries = (JSONObjectEntry *)malloc(sizeof(object->object.entries[0]) * new_size);
if (size > 0) if (size > 0)
{ {
memcpy(new_entries, object->object.entries, size * sizeof(object->object.entries[0])); memcpy(new_entries, object->object.entries, size * sizeof(object->object.entries[0]));
@ -52,7 +52,7 @@ void JSONArray_append(JSONValue * array, JSONValue * value)
{ {
size_t const size = array->array.size; size_t const size = array->array.size;
size_t const new_size = size + 1; size_t const new_size = size + 1;
JSONValue ** new_entries = malloc(sizeof(JSONValue *) * new_size); JSONValue ** new_entries = (JSONValue **)malloc(sizeof(JSONValue *) * new_size);
if (array->array.size > 0) if (array->array.size > 0)
{ {
memcpy(new_entries, array->array.entries, sizeof(JSONValue *) * size); memcpy(new_entries, array->array.entries, sizeof(JSONValue *) * size);

View File

@ -11,6 +11,12 @@
#define JSON_FALSE 5u #define JSON_FALSE 5u
#define JSON_NULL 6u #define JSON_NULL 6u
typedef struct JSONObjectEntry_s
{
char const * name;
struct JSONValue_s * value;
} JSONObjectEntry;
typedef struct JSONValue_s typedef struct JSONValue_s
{ {
size_t id; size_t id;
@ -19,11 +25,7 @@ typedef struct JSONValue_s
struct struct
{ {
size_t size; size_t size;
struct JSONObjectEntry * entries;
{
char const * name;
struct JSONValue_s * value;
} * entries;
} object; } object;
struct struct
{ {

View File

@ -65,17 +65,26 @@ EOF
end end
def compile(test_files, options = {}) def compile(test_files, options = {})
test_files = Array(test_files) test_files = Array(test_files).map do |test_file|
if !File.exist?(test_file) && test_file.end_with?(".cpp")
test_file.sub(%r{\.cpp$}, ".c")
else
test_file
end
end
options[:parsers] ||= [""] options[:parsers] ||= [""]
parsers = options[:parsers].map do |name| parsers = options[:parsers].map do |name|
"spec/run/testparser#{name}.#{options[:language]}" "spec/run/testparser#{name}.#{options[:language]}"
end end
case options[:language] case options[:language]
when "c" when "c"
result = system(*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm") command = [*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm"]
when "cpp"
command = [*%w[g++ -x c++ -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm"]
when "d" when "d"
result = system(*%w[ldc2 -g --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d") command = [*%w[ldc2 -g --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d"]
end end
result = system(*command)
expect(result).to be_truthy expect(result).to be_truthy
end end
@ -261,7 +270,7 @@ EOF
expect(results.status).to_not eq 0 expect(results.status).to_not eq 0
end end
%w[d c].each do |language| %w[d c cpp].each do |language|
context "#{language.upcase} language" do context "#{language.upcase} language" do
@ -284,7 +293,7 @@ EOF
it "detects a lexer error when an unknown character is seen" do it "detects a lexer error when an unknown character is seen" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
ptype int; ptype int;
token int /\\d+/ << token int /\\d+/ <<
@ -338,7 +347,7 @@ EOF
it "generates a parser that does basic math - user guide example" do it "generates a parser that does basic math - user guide example" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <math.h> #include <math.h>
@ -456,7 +465,7 @@ EOF
it "executes user code when matching lexer token" do it "executes user code when matching lexer token" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -498,7 +507,7 @@ EOF
it "supports a pattern statement" do it "supports a pattern statement" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -534,7 +543,7 @@ EOF
it "supports returning tokens from pattern code blocks" do it "supports returning tokens from pattern code blocks" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -574,7 +583,7 @@ EOF
it "supports lexer modes" do it "supports lexer modes" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -635,7 +644,7 @@ EOF
it "multiple lexer modes may apply to a pattern" do it "multiple lexer modes may apply to a pattern" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -691,7 +700,7 @@ EOF
it "executes user code associated with a parser rule" do it "executes user code associated with a parser rule" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -727,7 +736,7 @@ EOF
it "parses lists" do it "parses lists" do
write_grammar <<EOF write_grammar <<EOF
ptype #{language == "c" ? "uint32_t" : "uint"}; ptype #{language == "d" ? "uint" : "uint32_t"};
token a; token a;
Start -> As << $$ = $1; >> Start -> As << $$ = $1; >>
As -> << $$ = 0u; >> As -> << $$ = 0u; >>
@ -762,14 +771,14 @@ EOF
it "provides matched text to user code blocks" do it "provides matched text to user code blocks" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
>> >>
token id /[a-zA-Z_][a-zA-Z0-9_]*/ << token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * t = malloc(match_length + 1); char * t = (char *)malloc(match_length + 1);
strncpy(t, (char *)match, match_length); strncpy(t, (char *)match, match_length);
printf("Matched token is %s\\n", t); printf("Matched token is %s\\n", t);
free(t); free(t);
@ -799,7 +808,7 @@ EOF
it "allows storing a result value for the lexer" do it "allows storing a result value for the lexer" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
ptype uint64_t; ptype uint64_t;
token word /[a-z]+/ << token word /[a-z]+/ <<
@ -843,7 +852,8 @@ EOF
end end
it "allows creating a JSON parser" do it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.#{language}.propane")) ext = language == "cpp" ? "c" : language
write_grammar(File.read("spec/json_parser.#{ext}.propane"))
run_propane(language: language) run_propane(language: language)
compile(["spec/test_parsing_json.#{language}", "spec/json_types.#{language}"], language: language) compile(["spec/test_parsing_json.#{language}", "spec/json_types.#{language}"], language: language)
end end
@ -909,7 +919,7 @@ EOF
it "matches backslash escape sequences" do it "matches backslash escape sequences" do
case language case language
when "c" when "c", "cpp"
write_grammar <<EOF write_grammar <<EOF
<< <<
#include <stdio.h> #include <stdio.h>
@ -1123,8 +1133,8 @@ Start -> a? b R? <<
printf("b: %d\\n", $2); printf("b: %d\\n", $2);
printf("R: %s\\n", $3 == NULL ? "" : $3); printf("R: %s\\n", $3 == NULL ? "" : $3);
>> >>
R -> c d << $$ = "cd"; >> R -> c d << $$ = (char *)"cd"; >>
R (string) -> d c << $$ = "dc"; >> R (string) -> d c << $$ = (char *)"dc"; >>
EOF EOF
end end
run_propane(language: language) run_propane(language: language)
@ -1334,7 +1344,7 @@ EOF
>> >>
ptype char const *; ptype char const *;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ << token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * s = malloc(match_length + 1); char * s = (char *)malloc(match_length + 1);
strncpy(s, (char const *)match, match_length); strncpy(s, (char const *)match, match_length);
s[match_length] = 0; s[match_length] = 0;
$$ = s; $$ = s;
@ -1381,7 +1391,7 @@ EOF
>> >>
ptype char const *; ptype char const *;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ << token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * s = malloc(match_length + 1); char * s = (char *)malloc(match_length + 1);
strncpy(s, (char const *)match, match_length); strncpy(s, (char const *)match, match_length);
s[match_length] = 0; s[match_length] = 0;
$$ = s; $$ = s;
@ -1405,7 +1415,8 @@ EOF
end end
it "does not free memory allocated for AST nodes" do it "does not free memory allocated for AST nodes" do
write_grammar(File.read("spec/ast_node_memory_remains.#{language}.propane")) ext = language == "cpp" ? "c" : language
write_grammar(File.read("spec/ast_node_memory_remains.#{ext}.propane"))
run_propane(language: language) run_propane(language: language)
compile("spec/test_ast_node_memory_remains.#{language}", language: language) compile("spec/test_ast_node_memory_remains.#{language}", language: language)
results = run_test results = run_test

View File

@ -2,6 +2,10 @@ unless ENV["dist_specs"]
require "bundler/setup" require "bundler/setup"
require "simplecov" require "simplecov"
class MyFormatter
def format(*args)
end
end
SimpleCov.start do SimpleCov.start do
add_filter "/spec/" add_filter "/spec/"
add_filter "/.bundle/" add_filter "/.bundle/"
@ -12,6 +16,7 @@ unless ENV["dist_specs"]
end end
project_name "Propane" project_name "Propane"
merge_timeout 3600 merge_timeout 3600
formatter(MyFormatter)
end end
RSpec.configure do |config| RSpec.configure do |config|

View File

@ -386,7 +386,7 @@ int main(int argc, char * argv[])
} }
pmis = pmis->pModuleItems; pmis = pmis->pModuleItems;
} }
pfds = malloc(n_pfds * sizeof(PModuleItems *)); pfds = (PFunctionDefinition **)malloc(n_pfds * sizeof(PModuleItems *));
pmis = pmod->pModuleItems; pmis = pmod->pModuleItems;
size_t pfd_i = n_pfds; size_t pfd_i = n_pfds;
while (pmis != NULL) while (pmis != NULL)

View File

@ -17,7 +17,7 @@ void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_
void str_init(str_t * str, char const * cs) void str_init(str_t * str, char const * cs)
{ {
size_t length = strlen(cs); size_t length = strlen(cs);
str->cs = malloc(length + 1u); str->cs = (char *)malloc(length + 1u);
strcpy(str->cs, cs); strcpy(str->cs, cs);
} }
@ -25,7 +25,7 @@ void str_append(str_t * str, char const * cs)
{ {
size_t length = strlen(str->cs); size_t length = strlen(str->cs);
size_t length2 = strlen(cs); size_t length2 = strlen(cs);
char * new_cs = malloc(length + length2 + 1u); char * new_cs = (char *)malloc(length + length2 + 1u);
memcpy(new_cs, str->cs, length); memcpy(new_cs, str->cs, length);
strcpy(&new_cs[length], cs); strcpy(&new_cs[length], cs);
free(str->cs); free(str->cs);