Move grammar parsing into new Grammar class
This commit is contained in:
parent
34eb1370ff
commit
57a3e9d9f6
@ -5,6 +5,7 @@ require_relative "propane/code_point_range"
|
||||
require_relative "propane/fa"
|
||||
require_relative "propane/fa/state"
|
||||
require_relative "propane/fa/state/transition"
|
||||
require_relative "propane/grammar"
|
||||
require_relative "propane/lexer"
|
||||
require_relative "propane/lexer/dfa"
|
||||
require_relative "propane/parser"
|
||||
@ -36,12 +37,11 @@ class Propane
|
||||
end
|
||||
|
||||
def initialize(input)
|
||||
@tokens = {}
|
||||
@rule_sets = {}
|
||||
input = input.gsub("\r\n", "\n")
|
||||
while !input.empty?
|
||||
parse_grammar(input)
|
||||
end
|
||||
grammar = Grammar.new(input)
|
||||
@classname = grammar.classname
|
||||
@modulename = grammar.modulename
|
||||
@tokens = grammar.tokens
|
||||
@rule_sets = grammar.rule_sets
|
||||
end
|
||||
|
||||
def generate(output_file, log_file)
|
||||
@ -58,44 +58,6 @@ class Propane
|
||||
|
||||
private
|
||||
|
||||
def parse_grammar(input)
|
||||
if input.slice!(/\A\s+/)
|
||||
# Skip white space.
|
||||
elsif input.slice!(/\A#.*\n/)
|
||||
# Skip comment lines.
|
||||
elsif input.slice!(/\Amodule\s+(\S+)\n/)
|
||||
@modulename = $1
|
||||
elsif input.slice!(/\Aclass\s+(\S+)\n/)
|
||||
@classname = $1
|
||||
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
|
||||
name, pattern = $1, $2
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
end
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
if @tokens[name]
|
||||
raise Error.new("Duplicate token name #{name}")
|
||||
else
|
||||
@tokens[name] = Token.new(name, pattern, @tokens.size)
|
||||
end
|
||||
elsif input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens[name] = Token.new(nil, pattern, @tokens.size)
|
||||
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, components, code = $1, $2, $3
|
||||
components = components.strip.split(/\s+/)
|
||||
@rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size)
|
||||
@rule_sets[rule_name].add_rule(components, code)
|
||||
else
|
||||
if input.size > 25
|
||||
input = input.slice(0..20) + "..."
|
||||
end
|
||||
raise Error.new("Unexpected grammar input: #{input}")
|
||||
end
|
||||
end
|
||||
|
||||
def expand_rules
|
||||
@rule_sets.each do |rule_name, rule_set|
|
||||
if @tokens.include?(rule_name)
|
||||
|
64
lib/propane/grammar.rb
Normal file
64
lib/propane/grammar.rb
Normal file
@ -0,0 +1,64 @@
|
||||
class Propane
|
||||
|
||||
class Grammar
|
||||
|
||||
attr_reader :classname
|
||||
attr_reader :modulename
|
||||
attr_reader :rule_sets
|
||||
attr_reader :tokens
|
||||
|
||||
def initialize(input)
|
||||
@tokens = {}
|
||||
@rule_sets = {}
|
||||
input = input.gsub("\r\n", "\n")
|
||||
parse_grammar(input)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_grammar(input)
|
||||
line_number = 1
|
||||
while !input.empty?
|
||||
if sliced = input.slice!(/\A\s+/)
|
||||
# Skip white space.
|
||||
elsif sliced = input.slice!(/\A#.*\n/)
|
||||
# Skip comment lines.
|
||||
elsif sliced = input.slice!(/\Amodule\s+(\S+)\n/)
|
||||
@modulename = $1
|
||||
elsif sliced = input.slice!(/\Aclass\s+(\S+)\n/)
|
||||
@classname = $1
|
||||
elsif sliced = input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
|
||||
name, pattern = $1, $2
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
end
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
if @tokens[name]
|
||||
raise Error.new("Duplicate token name #{name}")
|
||||
else
|
||||
@tokens[name] = Token.new(name, pattern, @tokens.size, line_number)
|
||||
end
|
||||
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number)
|
||||
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, components, code = $1, $2, $3
|
||||
components = components.strip.split(/\s+/)
|
||||
@rule_sets[rule_name] ||= RuleSet.new(rule_name, @rule_sets.size)
|
||||
rule = Rule.new(rule_name, components, code, line_number)
|
||||
@rule_sets[rule_name].add_rule(rule)
|
||||
else
|
||||
if input.size > 25
|
||||
input = input.slice(0..20) + "..."
|
||||
end
|
||||
raise Error.new("Unexpected grammar input at line #{line_number}: #{input.chomp}")
|
||||
end
|
||||
line_number += sliced.count("\n")
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
@ -3,7 +3,7 @@ class Propane
|
||||
class Parser
|
||||
|
||||
def initialize(tokens, rule_sets)
|
||||
@token_eof = Token.new("$", nil, TOKEN_EOF)
|
||||
@token_eof = Token.new("$", nil, TOKEN_EOF, nil)
|
||||
@item_sets = []
|
||||
@item_sets_set = {}
|
||||
start_items = rule_sets["Start"].rules.map do |rule|
|
||||
|
@ -2,16 +2,37 @@ class Propane
|
||||
|
||||
class Rule
|
||||
|
||||
attr_reader :name
|
||||
|
||||
# @return [Array<Token, RuleSet>]
|
||||
# Rule components.
|
||||
attr_reader :components
|
||||
|
||||
# @return [String]
|
||||
# User code associated with the rule.
|
||||
attr_reader :code
|
||||
|
||||
def initialize(name, components, code)
|
||||
# @return [Integer]
|
||||
# Line number where the rule was defined in the input grammar.
|
||||
attr_reader :line_number
|
||||
|
||||
# @return [String]
|
||||
# Rule name.
|
||||
attr_reader :name
|
||||
|
||||
# Construct a Rule.
|
||||
#
|
||||
# @param name [String]
|
||||
# Rule name.
|
||||
# @param components [Array<String>]
|
||||
# Rule components.
|
||||
# @param code [String]
|
||||
# User code associated with the rule.
|
||||
# @param line_number [Integer]
|
||||
# Line number where the rule was defined in the input grammar.
|
||||
def initialize(name, components, code, line_number)
|
||||
@name = name
|
||||
@components = components
|
||||
@code = code
|
||||
@line_number = line_number
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -14,8 +14,8 @@ class Propane
|
||||
@rules = []
|
||||
end
|
||||
|
||||
def add_rule(components, code)
|
||||
@rules << Rule.new(@name, components, code)
|
||||
def add_rule(rule)
|
||||
@rules << rule
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -14,14 +14,29 @@ class Propane
|
||||
# Token ID.
|
||||
attr_reader :id
|
||||
|
||||
# @return [Integer, nil]
|
||||
# Line number where the token was defined in the input grammar.
|
||||
attr_reader :line_number
|
||||
|
||||
# @return [Regex::NFA]
|
||||
# Regex NFA for matching the token.
|
||||
attr_reader :nfa
|
||||
|
||||
def initialize(name, pattern, id)
|
||||
# Construct a Token.
|
||||
#
|
||||
# @param name [String]
|
||||
# Token name.
|
||||
# @param pattern [String]
|
||||
# Token pattern.
|
||||
# @param id [Integer]
|
||||
# Token ID.
|
||||
# @param line_number [Integer, nil]
|
||||
# Line number where the token was defined in the input grammar.
|
||||
def initialize(name, pattern, id, line_number)
|
||||
@name = name
|
||||
@pattern = pattern
|
||||
@id = id
|
||||
@line_number = line_number
|
||||
unless pattern.nil?
|
||||
regex = Regex.new(pattern)
|
||||
regex.nfa.end_state.accepts = self
|
||||
|
Loading…
x
Reference in New Issue
Block a user