Move grammar parsing into new Grammar class

This commit is contained in:
Josh Holtrop 2022-06-05 10:23:34 -04:00
parent 34eb1370ff
commit 57a3e9d9f6
6 changed files with 113 additions and 51 deletions

View File

@ -5,6 +5,7 @@ require_relative "propane/code_point_range"
require_relative "propane/fa" require_relative "propane/fa"
require_relative "propane/fa/state" require_relative "propane/fa/state"
require_relative "propane/fa/state/transition" require_relative "propane/fa/state/transition"
require_relative "propane/grammar"
require_relative "propane/lexer" require_relative "propane/lexer"
require_relative "propane/lexer/dfa" require_relative "propane/lexer/dfa"
require_relative "propane/parser" require_relative "propane/parser"
@ -36,12 +37,11 @@ class Propane
end end
def initialize(input) def initialize(input)
@tokens = {} grammar = Grammar.new(input)
@rule_sets = {} @classname = grammar.classname
input = input.gsub("\r\n", "\n") @modulename = grammar.modulename
while !input.empty? @tokens = grammar.tokens
parse_grammar(input) @rule_sets = grammar.rule_sets
end
end end
def generate(output_file, log_file) def generate(output_file, log_file)
@ -58,44 +58,6 @@ class Propane
private private
def parse_grammar(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @tokens[name]
raise Error.new("Duplicate token name #{name}")
else
@tokens[name] = Token.new(name, pattern, @tokens.size)
end
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens[name] = Token.new(nil, pattern, @tokens.size)
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, components, code = $1, $2, $3
components = components.strip.split(/\s+/)
@rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size)
@rule_sets[rule_name].add_rule(components, code)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end
end
def expand_rules def expand_rules
@rule_sets.each do |rule_name, rule_set| @rule_sets.each do |rule_name, rule_set|
if @tokens.include?(rule_name) if @tokens.include?(rule_name)

64
lib/propane/grammar.rb Normal file
View File

@ -0,0 +1,64 @@
class Propane
class Grammar
attr_reader :classname
attr_reader :modulename
attr_reader :rule_sets
attr_reader :tokens
def initialize(input)
@tokens = {}
@rule_sets = {}
input = input.gsub("\r\n", "\n")
parse_grammar(input)
end
private
def parse_grammar(input)
line_number = 1
while !input.empty?
if sliced = input.slice!(/\A\s+/)
# Skip white space.
elsif sliced = input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif sliced = input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif sliced = input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif sliced = input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @tokens[name]
raise Error.new("Duplicate token name #{name}")
else
@tokens[name] = Token.new(name, pattern, @tokens.size, line_number)
end
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number)
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, components, code = $1, $2, $3
components = components.strip.split(/\s+/)
@rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size)
rule = Rule.new(rule_name, components, code, line_number)
@rule_sets[rule_name].add_rule(rule)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input at line #{line_number}: #{input.chomp}")
end
line_number += sliced.count("\n")
end
end
end
end

View File

@ -3,7 +3,7 @@ class Propane
class Parser class Parser
def initialize(tokens, rule_sets) def initialize(tokens, rule_sets)
@token_eof = Token.new("$", nil, TOKEN_EOF) @token_eof = Token.new("$", nil, TOKEN_EOF, nil)
@item_sets = [] @item_sets = []
@item_sets_set = {} @item_sets_set = {}
start_items = rule_sets["Start"].rules.map do |rule| start_items = rule_sets["Start"].rules.map do |rule|

View File

@ -2,16 +2,37 @@ class Propane
class Rule class Rule
attr_reader :name # @return [Array<Token, RuleSet>]
# Rule components.
attr_reader :components attr_reader :components
# @return [String]
# User code associated with the rule.
attr_reader :code attr_reader :code
def initialize(name, components, code) # @return [Integer]
# Line number where the rule was defined in the input grammar.
attr_reader :line_number
# @return [String]
# Rule name.
attr_reader :name
# Construct a Rule.
#
# @param name [String]
# Rule name.
# @param components [Array<String>]
# Rule components.
# @param code [String]
# User code associated with the rule.
# @param line_number [Integer]
# Line number where the rule was defined in the input grammar.
def initialize(name, components, code, line_number)
@name = name @name = name
@components = components @components = components
@code = code @code = code
@line_number = line_number
end end
end end

View File

@ -14,8 +14,8 @@ class Propane
@rules = [] @rules = []
end end
def add_rule(components, code) def add_rule(rule)
@rules << Rule.new(@name, components, code) @rules << rule
end end
end end

View File

@ -14,14 +14,29 @@ class Propane
# Token ID. # Token ID.
attr_reader :id attr_reader :id
# @return [Integer, nil]
# Line number where the token was defined in the input grammar.
attr_reader :line_number
# @return [Regex::NFA] # @return [Regex::NFA]
# Regex NFA for matching the token. # Regex NFA for matching the token.
attr_reader :nfa attr_reader :nfa
def initialize(name, pattern, id) # Construct a Token.
#
# @param name [String]
# Token name.
# @param pattern [String]
# Token pattern.
# @param id [Integer]
# Token ID.
# @param line_number [Integer, nil]
# Line number where the token was defined in the input grammar.
def initialize(name, pattern, id, line_number)
@name = name @name = name
@pattern = pattern @pattern = pattern
@id = id @id = id
@line_number = line_number
unless pattern.nil? unless pattern.nil?
regex = Regex.new(pattern) regex = Regex.new(pattern)
regex.nfa.end_state.accepts = self regex.nfa.end_state.accepts = self