diff --git a/assets/parser.c.erb b/assets/parser.c.erb index a70db48..65ce0ee 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -320,9 +320,12 @@ static lexer_state_id_t check_lexer_transition(uint32_t current_state, uint32_t * * @param context * Lexer/parser context structure. - * @param[out] out_token_info - * The lexed token information is stored here if the return value is - * P_SUCCESS. + * @param[out] out_match_info + * The longest match information is stored here if the return value is + * P_SUCCESS or P_DECODE_ERROR. + * @param[out] out_unexpected_input_length + * The unexpected input length is stored here if the return value is + * P_UNEXPECTED_INPUT. * * @reval P_SUCCESS * A token was successfully lexed. @@ -608,6 +611,18 @@ typedef struct * reduce action. */ parser_state_id_t n_states; +<% if @grammar.ast %> + + /** + * Map of rule components to rule set child fields. + */ + uint16_t const * rule_set_node_field_index_map; + + /** + * Number of rule set AST node fields. + */ + uint16_t rule_set_node_field_array_size; +<% end %> } reduce_t; /** Parser state entry. */ @@ -638,6 +653,11 @@ typedef struct /** Parser value from this state. */ <%= @grammar.prefix %>value_t pvalue; + +<% if @grammar.ast %> + /** AST node. */ + void * ast_node; +<% end %> } state_value_t; /** Parser shift table. */ @@ -647,10 +667,27 @@ static const shift_t parser_shift_table[] = { <% end %> }; +<% if @grammar.ast %> +<% @grammar.rules.each do |rule| %> +<% unless rule.flat_rule_set_node_field_index_map? %> +const uint16_t r_<%= rule.name.gsub("$", "_") %><%= rule.id %>_node_field_index_map[<%= rule.rule_set_node_field_index_map.size %>] = {<%= rule.rule_set_node_field_index_map.map {|v| v.to_s}.join(", ") %>}; +<% end %> +<% end %> +<% end %> + /** Parser reduce table. */ static const reduce_t parser_reduce_table[] = { <% @parser.reduce_table.each do |reduce| %> - {<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u}, + {<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u +<% if @grammar.ast %> +<% if reduce[:rule].flat_rule_set_node_field_index_map? %> + , NULL +<% else %> + , &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0] +<% end %> + , <%= reduce[:rule].rule_set.ast_fields.size %> +<% end %> + }, <% end %> }; @@ -754,6 +791,7 @@ static void state_values_stack_free(state_values_stack_t * stack) free(stack->entries); } +<% unless @grammar.ast %> /** * Execute user code associated with a parser rule. * @@ -768,18 +806,19 @@ static size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint32_t { switch (rule) { -<% @grammar.rules.each do |rule| %> -<% if rule.code %> +<% @grammar.rules.each do |rule| %> +<% if rule.code %> case <%= rule.id %>u: { <%= expand_code(rule.code, true, rule, nil) %> } break; +<% end %> <% end %> -<% end %> default: break; } return P_SUCCESS; } +<% end %> /** * Check if the parser should shift to a new state. @@ -853,7 +892,11 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; state_values_stack_t statevalues; size_t reduced_rule_set = INVALID_ID; +<% if @grammar.ast %> + void * reduced_parser_node; +<% else %> <%= @grammar.prefix %>value_t reduced_parser_value; +<% end %> state_values_stack_init(&statevalues); state_values_stack_push(&statevalues); size_t result; @@ -880,7 +923,11 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) { /* Successful parse. */ +<% if @grammar.ast %> + context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node; +<% else %> context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue; +<% end %> result = P_SUCCESS; break; } @@ -893,15 +940,26 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) if (reduced_rule_set == INVALID_ID) { /* We shifted a token, mark it consumed. */ - token = INVALID_TOKEN_ID; +<% if @grammar.ast %> + Token * token_ast_node = malloc(sizeof(Token)); + token_ast_node->token = token; + token_ast_node->pvalue = token_info.pvalue; + state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; +<% else %> state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; +<% end %> + token = INVALID_TOKEN_ID; } else { /* We shifted a RuleSet. */ +<% if @grammar.ast %> + state_values_stack_index(&statevalues, -1)->ast_node = reduced_parser_node; +<% else %> state_values_stack_index(&statevalues, -1)->pvalue = reduced_parser_value; <%= @grammar.prefix %>value_t new_parse_result = {0}; reduced_parser_value = new_parse_result; +<% end %> reduced_rule_set = INVALID_ID; } continue; @@ -911,12 +969,38 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) if (reduce_index != INVALID_ID) { /* We have something to reduce. */ +<% if @grammar.ast %> + if (parser_reduce_table[reduce_index].n_states > 0) + { + void ** node_fields = calloc(parser_reduce_table[reduce_index].rule_set_node_field_array_size, sizeof(void *)); + if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL) + { + for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++) + { + node_fields[i] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; + } + } + else + { + for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++) + { + node_fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; + } + } + reduced_parser_node = node_fields; + } + else + { + reduced_parser_node = NULL; + } +<% else %> <%= @grammar.prefix %>value_t reduced_parser_value2 = {0}; if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED) { return P_USER_TERMINATED; } reduced_parser_value = reduced_parser_value2; +<% end %> reduced_rule_set = parser_reduce_table[reduce_index].rule_set; state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states); continue; @@ -944,9 +1028,17 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) * * @return Parse result value. */ +<% if @grammar.ast %> +Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +<% else %> <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +<% end %> { +<% if @grammar.ast %> + return context->parse_result; +<% else %> return context->parse_result.v_<%= start_rule_type[0] %>; +<% end %> } /** diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 79cee1c..e762a5d 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -49,13 +49,39 @@ public enum : <%= @grammar.prefix %>token_t /** Code point type. */ public alias <%= @grammar.prefix %>code_point_t = uint; +<% if @grammar.ast %> +/** Parser values type. */ +public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>; +<% else %> /** Parser values type(s). */ public union <%= @grammar.prefix %>value_t { -<% @grammar.ptypes.each do |name, typestring| %> +<% @grammar.ptypes.each do |name, typestring| %> <%= typestring %> v_<%= name %>; -<% end %> +<% end %> } +<% end %> + +<% if @grammar.ast %> +/** AST node types. @{ */ +public struct Token +{ + <%= @grammar.prefix %>token_t token; + <%= @grammar.prefix %>value_t pvalue; +} + +<% @parser.rule_sets.each do |name, rule_set| %> +<% next if name.start_with?("$") %> +public struct <%= name %> +{ +<% rule_set.ast_fields.each do |name, type| %> + <%= type %> * <%= name %>; +<% end %> +} + +<% end %> +/** @} */ +<% end %> /** * A structure to keep track of parser position. @@ -112,7 +138,11 @@ public struct <%= @grammar.prefix %>context_t /* Parser context data. */ /** Parse result value. */ +<% if @grammar.ast %> + Start * parse_result; +<% else %> <%= @grammar.prefix %>value_t parse_result; +<% end %> /** Unexpected token received. */ <%= @grammar.prefix %>token_t token; @@ -428,9 +458,12 @@ private lexer_state_id_t check_lexer_transition(uint current_state, uint code_po * * @param context * Lexer/parser context structure. - * @param[out] out_token_info - * The lexed token information is stored here if the return value is - * P_SUCCESS. + * @param[out] out_match_info + * The longest match information is stored here if the return value is + * P_SUCCESS or P_DECODE_ERROR. + * @param[out] out_unexpected_input_length + * The unexpected input length is stored here if the return value is + * P_UNEXPECTED_INPUT. * * @reval P_SUCCESS * A token was successfully lexed. @@ -714,6 +747,18 @@ private struct reduce_t * reduce action. */ parser_state_id_t n_states; +<% if @grammar.ast %> + + /** + * Map of rule components to rule set child fields. + */ + immutable(ushort) * rule_set_node_field_index_map; + + /** + * Number of rule set AST node fields. + */ + ushort rule_set_node_field_array_size; +<% end %> } /** Parser state entry. */ @@ -745,6 +790,11 @@ private struct state_value_t /** Parser value from this state. */ <%= @grammar.prefix %>value_t pvalue; +<% if @grammar.ast %> + /** AST node. */ + void * ast_node; +<% end %> + this(size_t state_id) { this.state_id = state_id; @@ -758,10 +808,27 @@ private immutable shift_t[] parser_shift_table = [ <% end %> ]; +<% if @grammar.ast %> +<% @grammar.rules.each do |rule| %> +<% unless rule.flat_rule_set_node_field_index_map? %> +immutable ushort[<%= rule.rule_set_node_field_index_map.size %>] r_<%= rule.name.gsub("$", "_") %><%= rule.id %>_node_field_index_map = [<%= rule.rule_set_node_field_index_map.map {|v| v.to_s}.join(", ") %>]; +<% end %> +<% end %> +<% end %> + /** Parser reduce table. */ private immutable reduce_t[] parser_reduce_table = [ <% @parser.reduce_table.each do |reduce| %> - reduce_t(<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u), + reduce_t(<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u +<% if @grammar.ast %> +<% if reduce[:rule].flat_rule_set_node_field_index_map? %> + , null +<% else %> + , &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0] +<% end %> + , <%= reduce[:rule].rule_set.ast_fields.size %> +<% end %> + ), <% end %> ]; @@ -772,6 +839,7 @@ private immutable parser_state_t[] parser_state_table = [ <% end %> ]; +<% unless @grammar.ast %> /** * Execute user code associated with a parser rule. * @@ -786,18 +854,19 @@ private size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint ru { switch (rule) { -<% @grammar.rules.each do |rule| %> -<% if rule.code %> +<% @grammar.rules.each do |rule| %> +<% if rule.code %> case <%= rule.id %>u: { <%= expand_code(rule.code, true, rule, nil) %> } break; +<% end %> <% end %> -<% end %> default: break; } return P_SUCCESS; } +<% end %> /** * Check if the parser should shift to a new state. @@ -871,7 +940,11 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; state_value_t[] statevalues = new state_value_t[](1); size_t reduced_rule_set = INVALID_ID; +<% if @grammar.ast %> + void * reduced_parser_node; +<% else %> <%= @grammar.prefix %>value_t reduced_parser_value; +<% end %> for (;;) { if (token == INVALID_TOKEN_ID) @@ -894,7 +967,11 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) { /* Successful parse. */ +<% if @grammar.ast %> + context.parse_result = cast(Start *)statevalues[$-1].ast_node; +<% else %> context.parse_result = statevalues[$-1].pvalue; +<% end %> return P_SUCCESS; } } @@ -905,15 +982,24 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont if (reduced_rule_set == INVALID_ID) { /* We shifted a token, mark it consumed. */ - token = INVALID_TOKEN_ID; +<% if @grammar.ast %> + Token * token_ast_node = new Token(token, token_info.pvalue); + statevalues[$-1].ast_node = token_ast_node; +<% else %> statevalues[$-1].pvalue = token_info.pvalue; +<% end %> + token = INVALID_TOKEN_ID; } else { /* We shifted a RuleSet. */ +<% if @grammar.ast %> + statevalues[$-1].ast_node = reduced_parser_node; +<% else %> statevalues[$-1].pvalue = reduced_parser_value; <%= @grammar.prefix %>value_t new_parse_result; reduced_parser_value = new_parse_result; +<% end %> reduced_rule_set = INVALID_ID; } continue; @@ -923,12 +1009,42 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont if (reduce_index != INVALID_ID) { /* We have something to reduce. */ +<% if @grammar.ast %> + if (parser_reduce_table[reduce_index].n_states > 0) + { + void *[] node_fields = new void *[parser_reduce_table[reduce_index].rule_set_node_field_array_size]; + foreach (i; 0..parser_reduce_table[reduce_index].rule_set_node_field_array_size) + { + node_fields[i] = null; + } + if (parser_reduce_table[reduce_index].rule_set_node_field_index_map is null) + { + foreach (i; 0..parser_reduce_table[reduce_index].n_states) + { + node_fields[i] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; + } + } + else + { + foreach (i; 0..parser_reduce_table[reduce_index].n_states) + { + node_fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; + } + } + reduced_parser_node = node_fields.ptr; + } + else + { + reduced_parser_node = null; + } +<% else %> <%= @grammar.prefix %>value_t reduced_parser_value2; if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED) { return P_USER_TERMINATED; } reduced_parser_value = reduced_parser_value2; +<% end %> reduced_rule_set = parser_reduce_table[reduce_index].rule_set; statevalues.length -= parser_reduce_table[reduce_index].n_states; continue; @@ -953,9 +1069,17 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont * * @return Parse result value. */ +<% if @grammar.ast %> +public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +<% else %> public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +<% end %> { +<% if @grammar.ast %> + return context.parse_result; +<% else %> return context.parse_result.v_<%= start_rule_type[0] %>; +<% end %> } /** diff --git a/assets/parser.h.erb b/assets/parser.h.erb index ed7853e..963003c 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -41,13 +41,44 @@ typedef uint32_t <%= @grammar.prefix %>code_point_t; /** User header code blocks. */ <%= @grammar.code_blocks.fetch("header", "") %> +<% if @grammar.ast %> +/** Parser values type. */ +typedef <%= @grammar.ptype %> <%= @grammar.prefix %>value_t; +<% else %> /** Parser values type(s). */ typedef union { -<% @grammar.ptypes.each do |name, typestring| %> +<% @grammar.ptypes.each do |name, typestring| %> <%= typestring %> v_<%= name %>; -<% end %> +<% end %> } <%= @grammar.prefix %>value_t; +<% end %> + +<% if @grammar.ast %> +/** AST node types. @{ */ +typedef struct Token +{ + <%= @grammar.prefix %>token_t token; + <%= @grammar.prefix %>value_t pvalue; +} Token; + +<% @parser.rule_sets.each do |name, rule_set| %> +<% next if name.start_with?("$") %> +struct <%= name %>; +<% end %> + +<% @parser.rule_sets.each do |name, rule_set| %> +<% next if name.start_with?("$") %> +typedef struct <%= name %> +{ +<% rule_set.ast_fields.each do |name, type| %> + struct <%= type %> * <%= name %>; +<% end %> +} <%= name %>; + +<% end %> +/** @} */ +<% end %> /** * A structure to keep track of parser position. @@ -107,7 +138,11 @@ typedef struct /* Parser context data. */ /** Parse result value. */ +<% if @grammar.ast %> + Start * parse_result; +<% else %> <%= @grammar.prefix %>value_t parse_result; +<% end %> /** Unexpected token received. */ <%= @grammar.prefix %>token_t token; @@ -132,7 +167,11 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); +<% if @grammar.ast %> +Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); +<% else %> <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); +<% end %> <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context); diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 548a79c..a690072 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -119,6 +119,9 @@ class Propane end end determine_possibly_empty_rulesets!(rule_sets) + rule_sets.each do |name, rule_set| + rule_set.finalize + end # Generate the lexer. @lexer = Lexer.new(@grammar) # Generate the parser. @@ -228,11 +231,20 @@ class Propane end else code = code.gsub(/\$\$/) do |match| - case @language - when "c" - "out_token_info->pvalue.v_#{pattern.ptypename}" - when "d" - "out_token_info.pvalue.v_#{pattern.ptypename}" + if @grammar.ast + case @language + when "c" + "out_token_info->pvalue" + when "d" + "out_token_info.pvalue" + end + else + case @language + when "c" + "out_token_info->pvalue.v_#{pattern.ptypename}" + when "d" + "out_token_info.pvalue.v_#{pattern.ptypename}" + end end end code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index c3b2f0f..181abc5 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -5,6 +5,7 @@ class Propane # Reserve identifiers beginning with a double-underscore for internal use. IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/ + attr_reader :ast attr_reader :modulename attr_reader :patterns attr_reader :rules @@ -24,6 +25,7 @@ class Propane @input = input.gsub("\r\n", "\n") @ptypes = {"default" => "void *"} @prefix = "p_" + @ast = false parse_grammar! end @@ -51,6 +53,7 @@ class Propane if parse_white_space! elsif parse_comment_line! elsif @mode.nil? && parse_mode_label! + elsif parse_ast_statement! elsif parse_module_statement! elsif parse_ptype_statement! elsif parse_pattern_statement! @@ -82,6 +85,12 @@ class Propane consume!(/#.*\n/) end + def parse_ast_statement! + if consume!(/ast\s*;/) + @ast = true + end + end + def parse_module_statement! if consume!(/module\s+/) md = consume!(/([\w.]+)\s*/, "expected module name") @@ -96,6 +105,9 @@ class Propane if consume!(/ptype\s+/) name = "default" if md = consume!(/(#{IDENTIFIER_REGEX})\s*=\s*/) + if @ast + raise Error.new("Multiple ptypes are unsupported in AST mode") + end name = md[1] end md = consume!(/([^;]+);/, "expected parser result type expression") @@ -108,12 +120,15 @@ class Propane md = consume!(/(#{IDENTIFIER_REGEX})\s*/, "expected token name") name = md[1] if md = consume!(/\((#{IDENTIFIER_REGEX})\)\s*/) + if @ast + raise Error.new("Multiple ptypes are unsupported in AST mode") + end ptypename = md[1] end pattern = parse_pattern! || name consume!(/\s+/) unless code = parse_code_block! - consume!(/;/, "expected pattern or `;' or code block") + consume!(/;/, "expected `;' or code block") end token = Token.new(name, ptypename, @line_number) @tokens << token @@ -129,6 +144,9 @@ class Propane md = consume!(/(#{IDENTIFIER_REGEX})\s*/, "expected token name") name = md[1] if md = consume!(/\((#{IDENTIFIER_REGEX})\)\s*/) + if @ast + raise Error.new("Multiple ptypes are unsupported in AST mode") + end ptypename = md[1] end consume!(/;/, "expected `;'"); @@ -156,10 +174,17 @@ class Propane def parse_rule_statement! if md = consume!(/(#{IDENTIFIER_REGEX})\s*(?:\((#{IDENTIFIER_REGEX})\))?\s*->\s*/) rule_name, ptypename = *md[1, 2] + if @ast && ptypename + raise Error.new("Multiple ptypes are unsupported in AST mode") + end md = consume!(/((?:#{IDENTIFIER_REGEX}\s*)*)\s*/, "expected rule component list") components = md[1].strip.split(/\s+/) - unless code = parse_code_block! - consume!(/;/, "expected pattern or `;' or code block") + if @ast + consume!(/;/, "expected `;'") + else + unless code = parse_code_block! + consume!(/;/, "expected `;' or code block") + end end @rules << Rule.new(rule_name, components, code, ptypename, @line_number) @mode = nil @@ -171,6 +196,9 @@ class Propane if pattern = parse_pattern! consume!(/\s+/) if md = consume!(/\((#{IDENTIFIER_REGEX})\)\s*/) + if @ast + raise Error.new("Multiple ptypes are unsupported in AST mode") + end ptypename = md[1] end unless code = parse_code_block! diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index cb7d8ef..da5eec4 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -63,11 +63,11 @@ class Propane reduce_entries = case ra = item_set.reduce_actions when Rule - [{token_id: @grammar.invalid_token_id, rule_id: ra.id, + [{token_id: @grammar.invalid_token_id, rule_id: ra.id, rule: ra, rule_set_id: ra.rule_set.id, n_states: ra.components.size}] when Hash ra.map do |token, rule| - {token_id: token.id, rule_id: rule.id, + {token_id: token.id, rule_id: rule.id, rule: rule, rule_set_id: rule.rule_set.id, n_states: rule.components.size} end else diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index dd4d7eb..ba97ee3 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -30,6 +30,11 @@ class Propane # The RuleSet that this Rule is a part of. attr_accessor :rule_set + # @return [Array] + # Map this rule's components to their positions in the parent RuleSet's + # node field pointer array. This is used for AST construction. + attr_accessor :rule_set_node_field_index_map + # Construct a Rule. # # @param name [String] @@ -45,6 +50,7 @@ class Propane def initialize(name, components, code, ptypename, line_number) @name = name @components = components + @rule_set_node_field_index_map = components.map {0} @code = code @ptypename = ptypename @line_number = line_number @@ -68,6 +74,17 @@ class Propane "#{@name} -> #{@components.map(&:name).join(" ")}" end + # Check whether the rule set node field index map is just a 1:1 mapping. + # + # @return [Boolean] + # Boolean indicating whether the rule set node field index map is just a + # 1:1 mapping. + def flat_rule_set_node_field_index_map? + @rule_set_node_field_index_map.each_with_index.all? do |v, i| + v == i + end + end + end end diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb index 3832a81..bb6a961 100644 --- a/lib/propane/rule_set.rb +++ b/lib/propane/rule_set.rb @@ -1,5 +1,6 @@ class Propane + # A RuleSet collects all grammar rules of the same name. class RuleSet # @return [Integer] @@ -75,6 +76,42 @@ class Propane @_start_token_set end + # Build the set of AST fields for this RuleSet. + # + # The keys are the field names and the values are the AST node structure + # names. + # + # @return [Hash] + # AST fields. + def ast_fields + @_ast_fields ||= + begin + field_indexes = {} + fields = {} + @rules.each do |rule| + rule.components.each_with_index do |component, i| + if component.is_a?(Token) + node_name = "Token" + else + node_name = component.name + end + field_name = "p#{node_name}#{i + 1}" + unless field_indexes[field_name] + field_indexes[field_name] = fields.size + fields[field_name] = node_name + end + rule.rule_set_node_field_index_map[i] = field_indexes[field_name] + end + end + fields + end + end + + # Finalize a RuleSet after adding all Rules to it. + def finalize + ast_fields + end + end end diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index ee8bade..b31e8d9 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -803,6 +803,48 @@ Opt -> ; EOF run_propane(language: language) end + + it "generates an AST" do + write_grammar <> +token b << $$ = 22; >> +token one /1/; +token two /2/; +token comma /,/ << + $$ = 42; +>> +token lparen /\\(/; +token rparen /\\)/; +drop /\\s+/; + +Start -> Items; + +Items -> Item ItemsMore; +Items -> ; + +ItemsMore -> comma Item ItemsMore; +ItemsMore -> ; + +Item -> a; +Item -> b; +Item -> lparen Item rparen; +Item -> Dual; + +Dual -> One Two; +Dual -> Two One; +One -> one; +Two -> two; +EOF + run_propane(language: language) + compile("spec/test_ast.#{language}", language: language) + results = run_test + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end end end diff --git a/spec/test_ast.c b/spec/test_ast.c new file mode 100644 index 0000000..45073e0 --- /dev/null +++ b/spec/test_ast.c @@ -0,0 +1,54 @@ +#include "testparser.h" +#include +#include +#include "testutils.h" + +int main() +{ + char const * input = "a, ((b)), b"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + Start * start = p_result(&context); + assert(start->pItems1 != NULL); + Items * items = start->pItems1; + assert(items->pItem1 != NULL); + assert(items->pItem1->pToken1 != NULL); + assert_eq(TOKEN_a, items->pItem1->pToken1->token); + assert_eq(11, items->pItem1->pToken1->pvalue); + assert(items->pItemsMore2 != NULL); + ItemsMore * itemsmore = items->pItemsMore2; + assert(itemsmore->pItem2 != NULL); + assert(itemsmore->pItem2->pItem2 != NULL); + assert(itemsmore->pItem2->pItem2->pItem2 != NULL); + assert(itemsmore->pItem2->pItem2->pItem2->pToken1 != NULL); + assert_eq(TOKEN_b, itemsmore->pItem2->pItem2->pItem2->pToken1->token); + assert_eq(22, itemsmore->pItem2->pItem2->pItem2->pToken1->pvalue); + assert(itemsmore->pItemsMore3 != NULL); + itemsmore = itemsmore->pItemsMore3; + assert(itemsmore->pItem2 != NULL); + assert(itemsmore->pItem2->pToken1 != NULL); + assert_eq(TOKEN_b, itemsmore->pItem2->pToken1->token); + assert_eq(22, itemsmore->pItem2->pToken1->pvalue); + assert(itemsmore->pItemsMore3 == NULL); + + input = ""; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start->pItems1 == NULL); + + input = "2 1"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start->pItems1 != NULL); + assert(start->pItems1->pItem1 != NULL); + assert(start->pItems1->pItem1->pDual1 != NULL); + assert(start->pItems1->pItem1->pDual1->pTwo1 != NULL); + assert(start->pItems1->pItem1->pDual1->pOne2 != NULL); + assert(start->pItems1->pItem1->pDual1->pTwo2 == NULL); + assert(start->pItems1->pItem1->pDual1->pOne1 == NULL); + + return 0; +} diff --git a/spec/test_ast.d b/spec/test_ast.d new file mode 100644 index 0000000..a554304 --- /dev/null +++ b/spec/test_ast.d @@ -0,0 +1,56 @@ +import testparser; +import std.stdio; +import testutils; + +int main() +{ + return 0; +} + +unittest +{ + string input = "a, ((b)), b"; + p_context_t context; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + Start * start = p_result(&context); + assert(start.pItems1 !is null); + Items * items = start.pItems1; + assert(items.pItem1 !is null); + assert(items.pItem1.pToken1 !is null); + assert_eq(TOKEN_a, items.pItem1.pToken1.token); + assert_eq(11, items.pItem1.pToken1.pvalue); + assert(items.pItemsMore2 !is null); + ItemsMore * itemsmore = items.pItemsMore2; + assert(itemsmore.pItem2 !is null); + assert(itemsmore.pItem2.pItem2 !is null); + assert(itemsmore.pItem2.pItem2.pItem2 !is null); + assert(itemsmore.pItem2.pItem2.pItem2.pToken1 !is null); + assert_eq(TOKEN_b, itemsmore.pItem2.pItem2.pItem2.pToken1.token); + assert_eq(22, itemsmore.pItem2.pItem2.pItem2.pToken1.pvalue); + assert(itemsmore.pItemsMore3 !is null); + itemsmore = itemsmore.pItemsMore3; + assert(itemsmore.pItem2 !is null); + assert(itemsmore.pItem2.pToken1 !is null); + assert_eq(TOKEN_b, itemsmore.pItem2.pToken1.token); + assert_eq(22, itemsmore.pItem2.pToken1.pvalue); + assert(itemsmore.pItemsMore3 is null); + + input = ""; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start.pItems1 is null); + + input = "2 1"; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start.pItems1 !is null); + assert(start.pItems1.pItem1 !is null); + assert(start.pItems1.pItem1.pDual1 !is null); + assert(start.pItems1.pItem1.pDual1.pTwo1 !is null); + assert(start.pItems1.pItem1.pDual1.pOne2 !is null); + assert(start.pItems1.pItem1.pDual1.pTwo2 is null); + assert(start.pItems1.pItem1.pDual1.pOne1 is null); +}