From 57a3e9d9f6ef5336ba55d14e9042a91e874d2779 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 5 Jun 2022 10:23:34 -0400 Subject: [PATCH] Move grammar parsing into new Grammar class --- lib/propane.rb | 50 ++++---------------------------- lib/propane/grammar.rb | 64 +++++++++++++++++++++++++++++++++++++++++ lib/propane/parser.rb | 2 +- lib/propane/rule.rb | 27 +++++++++++++++-- lib/propane/rule_set.rb | 4 +-- lib/propane/token.rb | 17 ++++++++++- 6 files changed, 113 insertions(+), 51 deletions(-) create mode 100644 lib/propane/grammar.rb diff --git a/lib/propane.rb b/lib/propane.rb index 1137814..00e7240 100644 --- a/lib/propane.rb +++ b/lib/propane.rb @@ -5,6 +5,7 @@ require_relative "propane/code_point_range" require_relative "propane/fa" require_relative "propane/fa/state" require_relative "propane/fa/state/transition" +require_relative "propane/grammar" require_relative "propane/lexer" require_relative "propane/lexer/dfa" require_relative "propane/parser" @@ -36,12 +37,11 @@ class Propane end def initialize(input) - @tokens = {} - @rule_sets = {} - input = input.gsub("\r\n", "\n") - while !input.empty? - parse_grammar(input) - end + grammar = Grammar.new(input) + @classname = grammar.classname + @modulename = grammar.modulename + @tokens = grammar.tokens + @rule_sets = grammar.rule_sets end def generate(output_file, log_file) @@ -58,44 +58,6 @@ class Propane private - def parse_grammar(input) - if input.slice!(/\A\s+/) - # Skip white space. - elsif input.slice!(/\A#.*\n/) - # Skip comment lines. - elsif input.slice!(/\Amodule\s+(\S+)\n/) - @modulename = $1 - elsif input.slice!(/\Aclass\s+(\S+)\n/) - @classname = $1 - elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/) - name, pattern = $1, $2 - if pattern.nil? - pattern = name - end - unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ - raise Error.new("Invalid token name #{name}") - end - if @tokens[name] - raise Error.new("Duplicate token name #{name}") - else - @tokens[name] = Token.new(name, pattern, @tokens.size) - end - elsif input.slice!(/\Adrop\s+(\S+)\n/) - pattern = $1 - @tokens[name] = Token.new(nil, pattern, @tokens.size) - elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m) - rule_name, components, code = $1, $2, $3 - components = components.strip.split(/\s+/) - @rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size) - @rule_sets[rule_name].add_rule(components, code) - else - if input.size > 25 - input = input.slice(0..20) + "..." - end - raise Error.new("Unexpected grammar input: #{input}") - end - end - def expand_rules @rule_sets.each do |rule_name, rule_set| if @tokens.include?(rule_name) diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb new file mode 100644 index 0000000..79ccf61 --- /dev/null +++ b/lib/propane/grammar.rb @@ -0,0 +1,64 @@ +class Propane + + class Grammar + + attr_reader :classname + attr_reader :modulename + attr_reader :rule_sets + attr_reader :tokens + + def initialize(input) + @tokens = {} + @rule_sets = {} + input = input.gsub("\r\n", "\n") + parse_grammar(input) + end + + private + + def parse_grammar(input) + line_number = 1 + while !input.empty? + if sliced = input.slice!(/\A\s+/) + # Skip white space. + elsif sliced = input.slice!(/\A#.*\n/) + # Skip comment lines. + elsif sliced = input.slice!(/\Amodule\s+(\S+)\n/) + @modulename = $1 + elsif sliced = input.slice!(/\Aclass\s+(\S+)\n/) + @classname = $1 + elsif sliced = input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/) + name, pattern = $1, $2 + if pattern.nil? + pattern = name + end + unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ + raise Error.new("Invalid token name #{name}") + end + if @tokens[name] + raise Error.new("Duplicate token name #{name}") + else + @tokens[name] = Token.new(name, pattern, @tokens.size, line_number) + end + elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/) + pattern = $1 + @tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number) + elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m) + rule_name, components, code = $1, $2, $3 + components = components.strip.split(/\s+/) + @rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size) + rule = Rule.new(rule_name, components, code, line_number) + @rule_sets[rule_name].add_rule(rule) + else + if input.size > 25 + input = input.slice(0..20) + "..." + end + raise Error.new("Unexpected grammar input at line #{line_number}: #{input.chomp}") + end + line_number += sliced.count("\n") + end + end + + end + +end diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index 4036b91..e2459f8 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -3,7 +3,7 @@ class Propane class Parser def initialize(tokens, rule_sets) - @token_eof = Token.new("$", nil, TOKEN_EOF) + @token_eof = Token.new("$", nil, TOKEN_EOF, nil) @item_sets = [] @item_sets_set = {} start_items = rule_sets["Start"].rules.map do |rule| diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index 5fa55dc..a06147e 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -2,16 +2,37 @@ class Propane class Rule - attr_reader :name - + # @return [Array] + # Rule components. attr_reader :components + # @return [String] + # User code associated with the rule. attr_reader :code - def initialize(name, components, code) + # @return [Integer] + # Line number where the rule was defined in the input grammar. + attr_reader :line_number + + # @return [String] + # Rule name. + attr_reader :name + + # Construct a Rule. + # + # @param name [String] + # Rule name. + # @param components [Array] + # Rule components. + # @param code [String] + # User code associated with the rule. + # @param line_number [Integer] + # Line number where the rule was defined in the input grammar. + def initialize(name, components, code, line_number) @name = name @components = components @code = code + @line_number = line_number end end diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb index 9302f2e..3c4637c 100644 --- a/lib/propane/rule_set.rb +++ b/lib/propane/rule_set.rb @@ -14,8 +14,8 @@ class Propane @rules = [] end - def add_rule(components, code) - @rules << Rule.new(@name, components, code) + def add_rule(rule) + @rules << rule end end diff --git a/lib/propane/token.rb b/lib/propane/token.rb index 1fdb05e..dced646 100644 --- a/lib/propane/token.rb +++ b/lib/propane/token.rb @@ -14,14 +14,29 @@ class Propane # Token ID. attr_reader :id + # @return [Integer, nil] + # Line number where the token was defined in the input grammar. + attr_reader :line_number + # @return [Regex::NFA] # Regex NFA for matching the token. attr_reader :nfa - def initialize(name, pattern, id) + # Construct a Token. + # + # @param name [String] + # Token name. + # @param pattern [String] + # Token pattern. + # @param id [Integer] + # Token ID. + # @param line_number [Integer, nil] + # Line number where the token was defined in the input grammar. + def initialize(name, pattern, id, line_number) @name = name @pattern = pattern @id = id + @line_number = line_number unless pattern.nil? regex = Regex.new(pattern) regex.nfa.end_state.accepts = self