Add forward slashes around patterns and parse more robustly

This commit is contained in:
Josh Holtrop 2022-09-28 23:05:01 -04:00
parent 1547528ecf
commit 04367db0ac
4 changed files with 86 additions and 44 deletions

View File

@ -14,6 +14,7 @@ class Propane
@rules = [] @rules = []
@code_id = 0 @code_id = 0
@line_number = 1 @line_number = 1
@next_line_number = @line_number
@input = input.gsub("\r\n", "\n") @input = input.gsub("\r\n", "\n")
parse_grammar! parse_grammar!
end end
@ -27,7 +28,6 @@ class Propane
end end
def parse_statement! def parse_statement!
@next_line_number = @line_number
if parse_white_space! if parse_white_space!
elsif parse_comment_line! elsif parse_comment_line!
elsif parse_module_statement! elsif parse_module_statement!
@ -42,53 +42,56 @@ class Propane
end end
raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}") raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}")
end end
@line_number = @next_line_number
end end
def parse_white_space! def parse_white_space!
consume!(/\A\s+/) consume!(/\s+/)
end end
def parse_comment_line! def parse_comment_line!
consume!(/\A#.*\n/) consume!(/#.*\n/)
end end
def parse_module_statement! def parse_module_statement!
if md = consume!(/\Amodule\s+(\S+)\s*;/) if consume!(/module\s+/)
md = consume!(/([\w.]+)\s*/, "expected module name")
@modulename = md[1] @modulename = md[1]
consume!(/;/, "expected `;'")
end end
end end
def parse_class_statement! def parse_class_statement!
if md = consume!(/\Aclass\s+(\S+)\s*;/) if consume!(/class\s+/)
md = consume!(/([\w.]+)\s*/, "expected class name")
@classname = md[1] @classname = md[1]
consume!(/;/, "expected `;'")
end end
end end
def parse_token_statement! def parse_token_statement!
if md = consume!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m) if consume!(/token\s+/)
name, pattern, code = *md[1, 3] md = consume!(/([a-zA-Z_][a-zA-Z_0-9]*)/, "expected token name")
if pattern.nil? name = md[1]
pattern = name if consume!(/\s+/)
pattern = parse_pattern!
end end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ pattern ||= name
raise Error.new("Invalid token name #{name.inspect}") consume!(/\s+/)
end if code = parse_code_block!
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
@tokens << token
if code
code_id = @code_id code_id = @code_id
@code_id += 1 @code_id += 1
else else
code_id = nil consume!(/;/, "expected pattern or `;' or code block")
end end
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id) pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id)
@patterns << pattern @patterns << pattern
end end
end end
def parse_tokenid_statement! def parse_tokenid_statement!
if md = consume!(/\Atokenid\s+(\S+?)\s*;/m) if md = consume!(/tokenid\s+(\S+?)\s*;/m)
name = md[1] name = md[1]
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name.inspect}") raise Error.new("Invalid token name #{name.inspect}")
@ -99,14 +102,19 @@ class Propane
end end
def parse_drop_statement! def parse_drop_statement!
if md = consume!(/\Adrop\s+(\S+)\s*;/) if md = consume!(/drop\s+/)
pattern = md[1] pattern = parse_pattern!
unless pattern
raise Error.new("Line #{@line_number}: expected pattern to follow `drop'")
end
consume!(/\s+/)
consume!(/;/, "expected `;'")
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true) @patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true)
end end
end end
def parse_rule_statement! def parse_rule_statement!
if md = consume!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m) if md = consume!(/(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
rule_name, components, code = *md[1, 3] rule_name, components, code = *md[1, 3]
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid rule name #{name.inspect}") raise Error.new("Invalid rule name #{name.inspect}")
@ -117,22 +125,56 @@ class Propane
end end
end end
def parse_pattern!
if md = consume!(%r{/})
pattern = ""
while !consume!(%r{/})
if consume!(%r{\\})
pattern += "\\"
if md = consume!(%r{(.)})
pattern += md[1]
else
raise Error.new("Line #{@line_number}: unterminated escape sequence")
end
elsif md = consume!(%r{(.)})
pattern += md[1]
end
end
pattern
end
end
def parse_code_block!
if md = consume!(/<<\n(.*?)^>>\n/m)
md[1]
end
end
# Check if the input string matches the given regex. # Check if the input string matches the given regex.
# #
# If so, remove the match from the input string, and update the line # If so, remove the match from the input string, and update the line
# number. # number. If the regex is not matched and an error message is provided,
# the error is raised.
# #
# @param regex [Regexp] # @param regex [Regexp]
# Regex to attempt to match. # Regex to attempt to match.
# @param error_message [String, nil]
# Error message to display if the regex is not matched. If nil and the
# regex is not matched, an error is not raised.
# #
# @return [MatchData, nil] # @return [MatchData, nil]
# MatchData for the given regex if it was matched and removed from the # MatchData for the given regex if it was matched and removed from the
# input. # input.
def consume!(regex) def consume!(regex, error_message = nil)
if md = @input.match(regex) @line_number = @next_line_number
if md = @input.match(/\A#{regex}/)
@input.slice!(0, md[0].size) @input.slice!(0, md[0].size)
@next_line_number += md[0].count("\n") @next_line_number += md[0].count("\n")
md md
elsif error_message
raise Error.new("Line #{@line_number}: Error: #{error_message}")
else
false
end end
end end

View File

@ -10,7 +10,7 @@ class Foobar;
token while; token while;
token id token id
[a-zA-Z_][a-zA-Z_0-9]*; /[a-zA-Z_][a-zA-Z_0-9]*/;
token token_with_code << token token_with_code <<
Code for the token Code for the token
@ -18,7 +18,7 @@ Code for the token
tokenid token_with_no_pattern; tokenid token_with_no_pattern;
drop \\s+; drop /\\s+/;
A -> B << A -> B <<
a = 42; a = 42;
@ -46,13 +46,13 @@ EOF
o = grammar.tokens.find {|token| token.name == "id"} o = grammar.tokens.find {|token| token.name == "id"}
expect(o).to_not be_nil expect(o).to_not be_nil
expect(o.line_number).to eq 8 expect(o.line_number).to eq 9
expect(o.id).to eq 1 expect(o.id).to eq 1
o = grammar.patterns.find {|pattern| pattern.token == o} o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil expect(o).to_not be_nil
expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*" expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*"
expect(o.line_number).to eq 8 expect(o.line_number).to eq 9
expect(o.code_id).to be_nil expect(o.code_id).to be_nil
expect(o.code).to be_nil expect(o.code).to be_nil

View File

@ -82,15 +82,15 @@ EOF
expect(run(<<EOF, "foobar")).to eq expected expect(run(<<EOF, "foobar")).to eq expected
token foo; token foo;
token bar; token bar;
token identifier [a-z]+; token identifier /[a-z]+/;
EOF EOF
expected = [ expected = [
["plusplus", "++"], ["plusplus", "++"],
["plus", "+"], ["plus", "+"],
] ]
expect(run(<<EOF, "+++")).to eq expected expect(run(<<EOF, "+++")).to eq expected
token plus \\+; token plus /\\+/;
token plusplus \\+\\+; token plusplus /\\+\\+/;
EOF EOF
end end
@ -103,7 +103,7 @@ EOF
expect(run(<<EOF, "foo \tbar")).to eq expected expect(run(<<EOF, "foo \tbar")).to eq expected
token foo; token foo;
token bar; token bar;
token WS \\s+; token WS /\\s+/;
EOF EOF
end end
@ -116,7 +116,7 @@ EOF
expect(run(<<EOF, "foo \tbar")).to eq expected expect(run(<<EOF, "foo \tbar")).to eq expected
token foo; token foo;
token bar; token bar;
drop \\s+; drop /\\s+/;
EOF EOF
end end
@ -125,7 +125,7 @@ EOF
["semicolon", ";"], ["semicolon", ";"],
] ]
expect(run(<<EOF, ";")).to eq expected expect(run(<<EOF, ";")).to eq expected
token semicolon \;; token semicolon /;/;
EOF EOF
end end
end end

View File

@ -27,10 +27,10 @@ describe Propane do
it "generates a D lexer" do it "generates a D lexer" do
write_grammar <<EOF write_grammar <<EOF
token int \\d+; token int /\\d+/;
token plus \\+; token plus /\\+/;
token times \\*; token times /\\*/;
drop \\s+; drop /\\s+/;
Start -> Foo; Start -> Foo;
Foo -> int << Foo -> int <<
>> >>
@ -44,10 +44,10 @@ EOF
it "generates a parser" do it "generates a parser" do
write_grammar <<EOF write_grammar <<EOF
token plus \\+; token plus /\\+/;
token times \\*; token times /\\*/;
token zero 0; token zero /0/;
token one 1; token one /1/;
Start -> E; Start -> E;
E -> E times B; E -> E times B;
E -> E plus B; E -> E plus B;
@ -60,7 +60,7 @@ EOF
it "generates an SLR parser" do it "generates an SLR parser" do
write_grammar <<EOF write_grammar <<EOF
token one 1; token one /1/;
Start -> E; Start -> E;
E -> one E; E -> one E;
E -> one; E -> one;
@ -86,7 +86,7 @@ EOF
write_grammar <<EOF write_grammar <<EOF
token a; token a;
token b; token b;
drop \\s+; drop /\\s+/;
Start -> a R1; Start -> a R1;
Start -> b R1; Start -> b R1;
R1 -> b; R1 -> b;