Add forward slashes around patterns and parse more robustly
This commit is contained in:
parent
1547528ecf
commit
04367db0ac
@ -14,6 +14,7 @@ class Propane
|
|||||||
@rules = []
|
@rules = []
|
||||||
@code_id = 0
|
@code_id = 0
|
||||||
@line_number = 1
|
@line_number = 1
|
||||||
|
@next_line_number = @line_number
|
||||||
@input = input.gsub("\r\n", "\n")
|
@input = input.gsub("\r\n", "\n")
|
||||||
parse_grammar!
|
parse_grammar!
|
||||||
end
|
end
|
||||||
@ -27,7 +28,6 @@ class Propane
|
|||||||
end
|
end
|
||||||
|
|
||||||
def parse_statement!
|
def parse_statement!
|
||||||
@next_line_number = @line_number
|
|
||||||
if parse_white_space!
|
if parse_white_space!
|
||||||
elsif parse_comment_line!
|
elsif parse_comment_line!
|
||||||
elsif parse_module_statement!
|
elsif parse_module_statement!
|
||||||
@ -42,53 +42,56 @@ class Propane
|
|||||||
end
|
end
|
||||||
raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}")
|
raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}")
|
||||||
end
|
end
|
||||||
@line_number = @next_line_number
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_white_space!
|
def parse_white_space!
|
||||||
consume!(/\A\s+/)
|
consume!(/\s+/)
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_comment_line!
|
def parse_comment_line!
|
||||||
consume!(/\A#.*\n/)
|
consume!(/#.*\n/)
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_module_statement!
|
def parse_module_statement!
|
||||||
if md = consume!(/\Amodule\s+(\S+)\s*;/)
|
if consume!(/module\s+/)
|
||||||
|
md = consume!(/([\w.]+)\s*/, "expected module name")
|
||||||
@modulename = md[1]
|
@modulename = md[1]
|
||||||
|
consume!(/;/, "expected `;'")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_class_statement!
|
def parse_class_statement!
|
||||||
if md = consume!(/\Aclass\s+(\S+)\s*;/)
|
if consume!(/class\s+/)
|
||||||
|
md = consume!(/([\w.]+)\s*/, "expected class name")
|
||||||
@classname = md[1]
|
@classname = md[1]
|
||||||
|
consume!(/;/, "expected `;'")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_token_statement!
|
def parse_token_statement!
|
||||||
if md = consume!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
|
if consume!(/token\s+/)
|
||||||
name, pattern, code = *md[1, 3]
|
md = consume!(/([a-zA-Z_][a-zA-Z_0-9]*)/, "expected token name")
|
||||||
if pattern.nil?
|
name = md[1]
|
||||||
pattern = name
|
if consume!(/\s+/)
|
||||||
|
pattern = parse_pattern!
|
||||||
end
|
end
|
||||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
pattern ||= name
|
||||||
raise Error.new("Invalid token name #{name.inspect}")
|
consume!(/\s+/)
|
||||||
end
|
if code = parse_code_block!
|
||||||
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
|
||||||
@tokens << token
|
|
||||||
if code
|
|
||||||
code_id = @code_id
|
code_id = @code_id
|
||||||
@code_id += 1
|
@code_id += 1
|
||||||
else
|
else
|
||||||
code_id = nil
|
consume!(/;/, "expected pattern or `;' or code block")
|
||||||
end
|
end
|
||||||
|
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
||||||
|
@tokens << token
|
||||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id)
|
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id)
|
||||||
@patterns << pattern
|
@patterns << pattern
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_tokenid_statement!
|
def parse_tokenid_statement!
|
||||||
if md = consume!(/\Atokenid\s+(\S+?)\s*;/m)
|
if md = consume!(/tokenid\s+(\S+?)\s*;/m)
|
||||||
name = md[1]
|
name = md[1]
|
||||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||||
raise Error.new("Invalid token name #{name.inspect}")
|
raise Error.new("Invalid token name #{name.inspect}")
|
||||||
@ -99,14 +102,19 @@ class Propane
|
|||||||
end
|
end
|
||||||
|
|
||||||
def parse_drop_statement!
|
def parse_drop_statement!
|
||||||
if md = consume!(/\Adrop\s+(\S+)\s*;/)
|
if md = consume!(/drop\s+/)
|
||||||
pattern = md[1]
|
pattern = parse_pattern!
|
||||||
|
unless pattern
|
||||||
|
raise Error.new("Line #{@line_number}: expected pattern to follow `drop'")
|
||||||
|
end
|
||||||
|
consume!(/\s+/)
|
||||||
|
consume!(/;/, "expected `;'")
|
||||||
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true)
|
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_rule_statement!
|
def parse_rule_statement!
|
||||||
if md = consume!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
|
if md = consume!(/(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
|
||||||
rule_name, components, code = *md[1, 3]
|
rule_name, components, code = *md[1, 3]
|
||||||
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||||
raise Error.new("Invalid rule name #{name.inspect}")
|
raise Error.new("Invalid rule name #{name.inspect}")
|
||||||
@ -117,22 +125,56 @@ class Propane
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def parse_pattern!
|
||||||
|
if md = consume!(%r{/})
|
||||||
|
pattern = ""
|
||||||
|
while !consume!(%r{/})
|
||||||
|
if consume!(%r{\\})
|
||||||
|
pattern += "\\"
|
||||||
|
if md = consume!(%r{(.)})
|
||||||
|
pattern += md[1]
|
||||||
|
else
|
||||||
|
raise Error.new("Line #{@line_number}: unterminated escape sequence")
|
||||||
|
end
|
||||||
|
elsif md = consume!(%r{(.)})
|
||||||
|
pattern += md[1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
pattern
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_code_block!
|
||||||
|
if md = consume!(/<<\n(.*?)^>>\n/m)
|
||||||
|
md[1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Check if the input string matches the given regex.
|
# Check if the input string matches the given regex.
|
||||||
#
|
#
|
||||||
# If so, remove the match from the input string, and update the line
|
# If so, remove the match from the input string, and update the line
|
||||||
# number.
|
# number. If the regex is not matched and an error message is provided,
|
||||||
|
# the error is raised.
|
||||||
#
|
#
|
||||||
# @param regex [Regexp]
|
# @param regex [Regexp]
|
||||||
# Regex to attempt to match.
|
# Regex to attempt to match.
|
||||||
|
# @param error_message [String, nil]
|
||||||
|
# Error message to display if the regex is not matched. If nil and the
|
||||||
|
# regex is not matched, an error is not raised.
|
||||||
#
|
#
|
||||||
# @return [MatchData, nil]
|
# @return [MatchData, nil]
|
||||||
# MatchData for the given regex if it was matched and removed from the
|
# MatchData for the given regex if it was matched and removed from the
|
||||||
# input.
|
# input.
|
||||||
def consume!(regex)
|
def consume!(regex, error_message = nil)
|
||||||
if md = @input.match(regex)
|
@line_number = @next_line_number
|
||||||
|
if md = @input.match(/\A#{regex}/)
|
||||||
@input.slice!(0, md[0].size)
|
@input.slice!(0, md[0].size)
|
||||||
@next_line_number += md[0].count("\n")
|
@next_line_number += md[0].count("\n")
|
||||||
md
|
md
|
||||||
|
elsif error_message
|
||||||
|
raise Error.new("Line #{@line_number}: Error: #{error_message}")
|
||||||
|
else
|
||||||
|
false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ class Foobar;
|
|||||||
token while;
|
token while;
|
||||||
|
|
||||||
token id
|
token id
|
||||||
[a-zA-Z_][a-zA-Z_0-9]*;
|
/[a-zA-Z_][a-zA-Z_0-9]*/;
|
||||||
|
|
||||||
token token_with_code <<
|
token token_with_code <<
|
||||||
Code for the token
|
Code for the token
|
||||||
@ -18,7 +18,7 @@ Code for the token
|
|||||||
|
|
||||||
tokenid token_with_no_pattern;
|
tokenid token_with_no_pattern;
|
||||||
|
|
||||||
drop \\s+;
|
drop /\\s+/;
|
||||||
|
|
||||||
A -> B <<
|
A -> B <<
|
||||||
a = 42;
|
a = 42;
|
||||||
@ -46,13 +46,13 @@ EOF
|
|||||||
|
|
||||||
o = grammar.tokens.find {|token| token.name == "id"}
|
o = grammar.tokens.find {|token| token.name == "id"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.line_number).to eq 8
|
expect(o.line_number).to eq 9
|
||||||
expect(o.id).to eq 1
|
expect(o.id).to eq 1
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*"
|
expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*"
|
||||||
expect(o.line_number).to eq 8
|
expect(o.line_number).to eq 9
|
||||||
expect(o.code_id).to be_nil
|
expect(o.code_id).to be_nil
|
||||||
expect(o.code).to be_nil
|
expect(o.code).to be_nil
|
||||||
|
|
||||||
|
@ -82,15 +82,15 @@ EOF
|
|||||||
expect(run(<<EOF, "foobar")).to eq expected
|
expect(run(<<EOF, "foobar")).to eq expected
|
||||||
token foo;
|
token foo;
|
||||||
token bar;
|
token bar;
|
||||||
token identifier [a-z]+;
|
token identifier /[a-z]+/;
|
||||||
EOF
|
EOF
|
||||||
expected = [
|
expected = [
|
||||||
["plusplus", "++"],
|
["plusplus", "++"],
|
||||||
["plus", "+"],
|
["plus", "+"],
|
||||||
]
|
]
|
||||||
expect(run(<<EOF, "+++")).to eq expected
|
expect(run(<<EOF, "+++")).to eq expected
|
||||||
token plus \\+;
|
token plus /\\+/;
|
||||||
token plusplus \\+\\+;
|
token plusplus /\\+\\+/;
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -103,7 +103,7 @@ EOF
|
|||||||
expect(run(<<EOF, "foo \tbar")).to eq expected
|
expect(run(<<EOF, "foo \tbar")).to eq expected
|
||||||
token foo;
|
token foo;
|
||||||
token bar;
|
token bar;
|
||||||
token WS \\s+;
|
token WS /\\s+/;
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ EOF
|
|||||||
expect(run(<<EOF, "foo \tbar")).to eq expected
|
expect(run(<<EOF, "foo \tbar")).to eq expected
|
||||||
token foo;
|
token foo;
|
||||||
token bar;
|
token bar;
|
||||||
drop \\s+;
|
drop /\\s+/;
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ EOF
|
|||||||
["semicolon", ";"],
|
["semicolon", ";"],
|
||||||
]
|
]
|
||||||
expect(run(<<EOF, ";")).to eq expected
|
expect(run(<<EOF, ";")).to eq expected
|
||||||
token semicolon \;;
|
token semicolon /;/;
|
||||||
EOF
|
EOF
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -27,10 +27,10 @@ describe Propane do
|
|||||||
|
|
||||||
it "generates a D lexer" do
|
it "generates a D lexer" do
|
||||||
write_grammar <<EOF
|
write_grammar <<EOF
|
||||||
token int \\d+;
|
token int /\\d+/;
|
||||||
token plus \\+;
|
token plus /\\+/;
|
||||||
token times \\*;
|
token times /\\*/;
|
||||||
drop \\s+;
|
drop /\\s+/;
|
||||||
Start -> Foo;
|
Start -> Foo;
|
||||||
Foo -> int <<
|
Foo -> int <<
|
||||||
>>
|
>>
|
||||||
@ -44,10 +44,10 @@ EOF
|
|||||||
|
|
||||||
it "generates a parser" do
|
it "generates a parser" do
|
||||||
write_grammar <<EOF
|
write_grammar <<EOF
|
||||||
token plus \\+;
|
token plus /\\+/;
|
||||||
token times \\*;
|
token times /\\*/;
|
||||||
token zero 0;
|
token zero /0/;
|
||||||
token one 1;
|
token one /1/;
|
||||||
Start -> E;
|
Start -> E;
|
||||||
E -> E times B;
|
E -> E times B;
|
||||||
E -> E plus B;
|
E -> E plus B;
|
||||||
@ -60,7 +60,7 @@ EOF
|
|||||||
|
|
||||||
it "generates an SLR parser" do
|
it "generates an SLR parser" do
|
||||||
write_grammar <<EOF
|
write_grammar <<EOF
|
||||||
token one 1;
|
token one /1/;
|
||||||
Start -> E;
|
Start -> E;
|
||||||
E -> one E;
|
E -> one E;
|
||||||
E -> one;
|
E -> one;
|
||||||
@ -86,7 +86,7 @@ EOF
|
|||||||
write_grammar <<EOF
|
write_grammar <<EOF
|
||||||
token a;
|
token a;
|
||||||
token b;
|
token b;
|
||||||
drop \\s+;
|
drop /\\s+/;
|
||||||
Start -> a R1;
|
Start -> a R1;
|
||||||
Start -> b R1;
|
Start -> b R1;
|
||||||
R1 -> b;
|
R1 -> b;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user