From b2d11321fe40789765e2730900f361d182e01e5f Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 4 Oct 2022 22:23:39 -0400 Subject: [PATCH] Add grammar syntax to specify lexer mode for tokens and patterns --- lib/propane/grammar.rb | 39 ++++++++++++++++++++++++++++---- lib/propane/pattern.rb | 7 ++++++ spec/propane/grammar_spec.rb | 44 ++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 48e1876..bf45596 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -16,6 +16,7 @@ class Propane @code_blocks = [] @line_number = 1 @next_line_number = @line_number + @mode = nil @input = input.gsub("\r\n", "\n") parse_grammar! end @@ -31,6 +32,7 @@ class Propane def parse_statement! if parse_white_space! elsif parse_comment_line! + elsif @mode.nil? && parse_mode_label! elsif parse_module_statement! elsif parse_class_statement! elsif parse_pattern_statement! @@ -38,8 +40,7 @@ class Propane elsif parse_tokenid_statement! elsif parse_drop_statement! elsif parse_rule_statement! - elsif code = parse_code_block! - @code_blocks << code + elsif parse_code_block_statement! else if @input.size > 25 @input = @input.slice(0..20) + "..." @@ -48,6 +49,12 @@ class Propane end end + def parse_mode_label! + if md = consume!(/([a-zA-Z_][a-zA-Z_0-9]*)\s*:/) + @mode = md[1] + end + end + def parse_white_space! consume!(/\s+/) end @@ -61,6 +68,8 @@ class Propane md = consume!(/([\w.]+)\s*/, "expected module name") @modulename = md[1] consume!(/;/, "expected `;'") + @mode = nil + true end end @@ -69,6 +78,8 @@ class Propane md = consume!(/([\w.]+)\s*/, "expected class name") @classname = md[1] consume!(/;/, "expected `;'") + @mode = nil + true end end @@ -86,8 +97,10 @@ class Propane end token = Token.new(name, @line_number) @tokens << token - pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code) + pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode) @patterns << pattern + @mode = nil + true end end @@ -99,6 +112,8 @@ class Propane end token = Token.new(name, @line_number) @tokens << token + @mode = nil + true end end @@ -110,7 +125,9 @@ class Propane end consume!(/\s+/) consume!(/;/, "expected `;'") - @patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true) + @patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true, mode: @mode) + @mode = nil + true end end @@ -122,6 +139,8 @@ class Propane end components = components.strip.split(/\s+/) @rules << Rule.new(rule_name, components, code, @line_number) + @mode = nil + true end end @@ -131,7 +150,17 @@ class Propane unless code = parse_code_block! raise Error.new("Line #{@line_number}: expected code block to follow pattern") end - @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code) + @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode) + @mode = nil + true + end + end + + def parse_code_block_statement! + if code = parse_code_block! + @code_blocks << code + @mode = nil + true end end diff --git a/lib/propane/pattern.rb b/lib/propane/pattern.rb index 40688b0..80b8048 100644 --- a/lib/propane/pattern.rb +++ b/lib/propane/pattern.rb @@ -26,6 +26,10 @@ class Propane # Regex NFA for matching the pattern. attr_reader :nfa + # @return [String, nil] + # Lexer mode for this pattern. + attr_reader :mode + # Construct a Pattern. # # @param options [Hash] @@ -40,12 +44,15 @@ class Propane # Token to be returned by this pattern. # @option options [Integer, nil] :line_number # Line number where the token was defined in the input grammar. + # @option options [String, nil] :mode + # Lexer mode for this pattern. def initialize(options) @code = options[:code] @drop = options[:drop] @pattern = options[:pattern] @token = options[:token] @line_number = options[:line_number] + @mode = options[:mode] regex = Regex.new(@pattern) regex.nfa.end_state.accepts = self @nfa = regex.nfa diff --git a/spec/propane/grammar_spec.rb b/spec/propane/grammar_spec.rb index 9b8a50c..ed5845d 100644 --- a/spec/propane/grammar_spec.rb +++ b/spec/propane/grammar_spec.rb @@ -127,5 +127,49 @@ EOF expect(o).to_not be_nil expect(o.code).to eq %[ writeln("Hello there");\n] end + + it "supports mode labels" do + input = <> +m2: /bar/ << +>> +drop /q/; +m3: drop /r/; +EOF + grammar = Grammar.new(input) + + o = grammar.tokens.find {|token| token.name == "a"} + expect(o).to_not be_nil + + o = grammar.patterns.find {|pattern| pattern.token == o} + expect(o).to_not be_nil + expect(o.mode).to be_nil + + o = grammar.tokens.find {|token| token.name == "b"} + expect(o).to_not be_nil + + o = grammar.patterns.find {|pattern| pattern.token == o} + expect(o).to_not be_nil + expect(o.mode).to eq "m1" + + o = grammar.patterns.find {|pattern| pattern.pattern == "foo"} + expect(o).to_not be_nil + expect(o.mode).to be_nil + + o = grammar.patterns.find {|pattern| pattern.pattern == "bar"} + expect(o).to_not be_nil + expect(o.mode).to eq "m2" + + o = grammar.patterns.find {|pattern| pattern.pattern == "q"} + expect(o).to_not be_nil + expect(o.mode).to be_nil + + o = grammar.patterns.find {|pattern| pattern.pattern == "r"} + expect(o).to_not be_nil + expect(o.mode).to eq "m3" + end end end