Allow multiple lexer modes to be specified for a lexer pattern - close #35
This commit is contained in:
parent
54bb3307cd
commit
eb9d9026fc
1
Gemfile
1
Gemfile
@ -1,5 +1,6 @@
|
|||||||
source "https://rubygems.org"
|
source "https://rubygems.org"
|
||||||
|
|
||||||
|
gem "base64"
|
||||||
gem "rake"
|
gem "rake"
|
||||||
gem "rspec"
|
gem "rspec"
|
||||||
gem "rdoc"
|
gem "rdoc"
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
GEM
|
GEM
|
||||||
remote: https://rubygems.org/
|
remote: https://rubygems.org/
|
||||||
specs:
|
specs:
|
||||||
|
base64 (0.2.0)
|
||||||
date (3.4.1)
|
date (3.4.1)
|
||||||
diff-lcs (1.5.1)
|
diff-lcs (1.5.1)
|
||||||
docile (1.4.1)
|
docile (1.4.1)
|
||||||
@ -37,6 +38,7 @@ PLATFORMS
|
|||||||
ruby
|
ruby
|
||||||
|
|
||||||
DEPENDENCIES
|
DEPENDENCIES
|
||||||
|
base64
|
||||||
rake
|
rake
|
||||||
rdoc
|
rdoc
|
||||||
redcarpet
|
redcarpet
|
||||||
|
|||||||
@ -536,6 +536,28 @@ It also returns the `str` token now that the token is complete.
|
|||||||
Note that the token name `str` above could have been `string` instead - the
|
Note that the token name `str` above could have been `string` instead - the
|
||||||
namespace for token names is distinct from the namespace for lexer modes.
|
namespace for token names is distinct from the namespace for lexer modes.
|
||||||
|
|
||||||
|
Multiple modes can be specified for a token or pattern or drop statement.
|
||||||
|
For example, if the grammar wanted to only recognize an identifier following
|
||||||
|
a `.` token and not other keywords, it could switch to an `identonly` mode
|
||||||
|
when matching a `.`
|
||||||
|
The `ident` token pattern will be matched in either the `default` or
|
||||||
|
`identonly` mode.
|
||||||
|
|
||||||
|
```
|
||||||
|
ptype char;
|
||||||
|
token abc;
|
||||||
|
token def;
|
||||||
|
default, identonly: token ident /[a-z]+/ <<
|
||||||
|
$$ = match[0];
|
||||||
|
$mode(default);
|
||||||
|
return $token(ident);
|
||||||
|
>>
|
||||||
|
token dot /\\./ <<
|
||||||
|
$mode(identonly);
|
||||||
|
>>
|
||||||
|
default, identonly: drop /\\s+/;
|
||||||
|
```
|
||||||
|
|
||||||
##> Specifying parser value types - the `ptype` statement
|
##> Specifying parser value types - the `ptype` statement
|
||||||
|
|
||||||
The `ptype` statement is used to define parser value type(s).
|
The `ptype` statement is used to define parser value type(s).
|
||||||
|
|||||||
@ -43,8 +43,8 @@ class Propane
|
|||||||
# Assign default pattern mode to patterns without a mode assigned.
|
# Assign default pattern mode to patterns without a mode assigned.
|
||||||
found_default = false
|
found_default = false
|
||||||
@grammar.patterns.each do |pattern|
|
@grammar.patterns.each do |pattern|
|
||||||
if pattern.mode.nil?
|
if pattern.modes.empty?
|
||||||
pattern.mode = "default"
|
pattern.modes << "default"
|
||||||
found_default = true
|
found_default = true
|
||||||
end
|
end
|
||||||
pattern.ptypename ||= "default"
|
pattern.ptypename ||= "default"
|
||||||
|
|||||||
@ -25,7 +25,7 @@ class Propane
|
|||||||
@code_blocks = {}
|
@code_blocks = {}
|
||||||
@line_number = 1
|
@line_number = 1
|
||||||
@next_line_number = @line_number
|
@next_line_number = @line_number
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
@input = input.gsub("\r\n", "\n")
|
@input = input.gsub("\r\n", "\n")
|
||||||
@ptypes = {"default" => "void *"}
|
@ptypes = {"default" => "void *"}
|
||||||
@prefix = "p_"
|
@prefix = "p_"
|
||||||
@ -58,7 +58,7 @@ class Propane
|
|||||||
def parse_statement!
|
def parse_statement!
|
||||||
if parse_white_space!
|
if parse_white_space!
|
||||||
elsif parse_comment_line!
|
elsif parse_comment_line!
|
||||||
elsif @mode.nil? && parse_mode_label!
|
elsif @modeline.nil? && parse_mode_label!
|
||||||
elsif parse_ast_statement!
|
elsif parse_ast_statement!
|
||||||
elsif parse_ast_prefix_statement!
|
elsif parse_ast_prefix_statement!
|
||||||
elsif parse_ast_suffix_statement!
|
elsif parse_ast_suffix_statement!
|
||||||
@ -81,8 +81,8 @@ class Propane
|
|||||||
end
|
end
|
||||||
|
|
||||||
def parse_mode_label!
|
def parse_mode_label!
|
||||||
if md = consume!(/(#{IDENTIFIER_REGEX})\s*:/)
|
if md = consume!(/(#{IDENTIFIER_REGEX}(?:\s*,\s*#{IDENTIFIER_REGEX})*)\s*:/)
|
||||||
@mode = md[1]
|
@modeline = md[1]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -117,7 +117,7 @@ class Propane
|
|||||||
md = consume!(/([\w.]+)\s*/, "expected module name")
|
md = consume!(/([\w.]+)\s*/, "expected module name")
|
||||||
@modulename = md[1]
|
@modulename = md[1]
|
||||||
consume!(/;/, "expected `;'")
|
consume!(/;/, "expected `;'")
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -153,9 +153,9 @@ class Propane
|
|||||||
end
|
end
|
||||||
token = Token.new(name, ptypename, @line_number)
|
token = Token.new(name, ptypename, @line_number)
|
||||||
@tokens << token
|
@tokens << token
|
||||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
|
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
|
||||||
@patterns << pattern
|
@patterns << pattern
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -173,7 +173,7 @@ class Propane
|
|||||||
consume!(/;/, "expected `;'");
|
consume!(/;/, "expected `;'");
|
||||||
token = Token.new(name, ptypename, @line_number)
|
token = Token.new(name, ptypename, @line_number)
|
||||||
@tokens << token
|
@tokens << token
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -186,8 +186,8 @@ class Propane
|
|||||||
end
|
end
|
||||||
consume!(/\s+/)
|
consume!(/\s+/)
|
||||||
consume!(/;/, "expected `;'")
|
consume!(/;/, "expected `;'")
|
||||||
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, mode: @mode)
|
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, modes: get_modes_from_modeline)
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -208,7 +208,7 @@ class Propane
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
@rules << Rule.new(rule_name, components, code, ptypename, @line_number)
|
@rules << Rule.new(rule_name, components, code, ptypename, @line_number)
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -225,8 +225,8 @@ class Propane
|
|||||||
unless code = parse_code_block!
|
unless code = parse_code_block!
|
||||||
raise Error.new("Line #{@line_number}: expected code block to follow pattern")
|
raise Error.new("Line #{@line_number}: expected code block to follow pattern")
|
||||||
end
|
end
|
||||||
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
|
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -247,7 +247,7 @@ class Propane
|
|||||||
else
|
else
|
||||||
@code_blocks[name] = code
|
@code_blocks[name] = code
|
||||||
end
|
end
|
||||||
@mode = nil
|
@modeline = nil
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -315,6 +315,14 @@ class Propane
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def get_modes_from_modeline
|
||||||
|
if @modeline
|
||||||
|
Set[*@modeline.split(",").map(&:strip)]
|
||||||
|
else
|
||||||
|
Set.new
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|||||||
@ -26,8 +26,14 @@ class Propane
|
|||||||
private
|
private
|
||||||
|
|
||||||
def build_tables!
|
def build_tables!
|
||||||
@modes = @grammar.patterns.group_by do |pattern|
|
modenames = @grammar.patterns.reduce(Set.new) do |result, pattern|
|
||||||
pattern.mode
|
result + pattern.modes
|
||||||
|
end
|
||||||
|
@modes = modenames.reduce({}) do |result, modename|
|
||||||
|
result[modename] = @grammar.patterns.select do |pattern|
|
||||||
|
pattern.modes.include?(modename)
|
||||||
|
end
|
||||||
|
result
|
||||||
end.transform_values do |patterns|
|
end.transform_values do |patterns|
|
||||||
{dfa: DFA.new(patterns)}
|
{dfa: DFA.new(patterns)}
|
||||||
end
|
end
|
||||||
|
|||||||
@ -26,9 +26,9 @@ class Propane
|
|||||||
# Regex NFA for matching the pattern.
|
# Regex NFA for matching the pattern.
|
||||||
attr_reader :nfa
|
attr_reader :nfa
|
||||||
|
|
||||||
# @return [String, nil]
|
# @return [Set]
|
||||||
# Lexer mode for this pattern.
|
# Lexer modes for this pattern.
|
||||||
attr_accessor :mode
|
attr_accessor :modes
|
||||||
|
|
||||||
# @return [String, nil]
|
# @return [String, nil]
|
||||||
# Parser value type name.
|
# Parser value type name.
|
||||||
@ -46,14 +46,14 @@ class Propane
|
|||||||
# Token to be returned by this pattern.
|
# Token to be returned by this pattern.
|
||||||
# @option options [Integer, nil] :line_number
|
# @option options [Integer, nil] :line_number
|
||||||
# Line number where the token was defined in the input grammar.
|
# Line number where the token was defined in the input grammar.
|
||||||
# @option options [String, nil] :mode
|
# @option options [String, nil] :modes
|
||||||
# Lexer mode for this pattern.
|
# Lexer modes for this pattern.
|
||||||
def initialize(options)
|
def initialize(options)
|
||||||
@code = options[:code]
|
@code = options[:code]
|
||||||
@pattern = options[:pattern]
|
@pattern = options[:pattern]
|
||||||
@token = options[:token]
|
@token = options[:token]
|
||||||
@line_number = options[:line_number]
|
@line_number = options[:line_number]
|
||||||
@mode = options[:mode]
|
@modes = options[:modes]
|
||||||
@ptypename = options[:ptypename]
|
@ptypename = options[:ptypename]
|
||||||
regex = Regex.new(@pattern)
|
regex = Regex.new(@pattern)
|
||||||
regex.nfa.end_state.accepts = self
|
regex.nfa.end_state.accepts = self
|
||||||
|
|||||||
@ -151,30 +151,30 @@ EOF
|
|||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to be_nil
|
expect(o.modes).to be_empty
|
||||||
|
|
||||||
o = grammar.tokens.find {|token| token.name == "b"}
|
o = grammar.tokens.find {|token| token.name == "b"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to eq "m1"
|
expect(o.modes).to eq Set["m1"]
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.pattern == "foo"}
|
o = grammar.patterns.find {|pattern| pattern.pattern == "foo"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to be_nil
|
expect(o.modes).to be_empty
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.pattern == "bar"}
|
o = grammar.patterns.find {|pattern| pattern.pattern == "bar"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to eq "m2"
|
expect(o.modes).to eq Set["m2"]
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.pattern == "q"}
|
o = grammar.patterns.find {|pattern| pattern.pattern == "q"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to be_nil
|
expect(o.modes).to be_empty
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.pattern == "r"}
|
o = grammar.patterns.find {|pattern| pattern.pattern == "r"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.mode).to eq "m3"
|
expect(o.modes).to eq Set["m3"]
|
||||||
end
|
end
|
||||||
|
|
||||||
it "allows assigning ptypes to tokens and rules" do
|
it "allows assigning ptypes to tokens and rules" do
|
||||||
|
|||||||
@ -621,6 +621,62 @@ EOF
|
|||||||
])
|
])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "multiple lexer modes may apply to a pattern" do
|
||||||
|
case language
|
||||||
|
when "c"
|
||||||
|
write_grammar <<EOF
|
||||||
|
<<
|
||||||
|
#include <stdio.h>
|
||||||
|
>>
|
||||||
|
ptype char;
|
||||||
|
token abc;
|
||||||
|
token def;
|
||||||
|
default, identonly: token ident /[a-z]+/ <<
|
||||||
|
$$ = match[0];
|
||||||
|
$mode(default);
|
||||||
|
return $token(ident);
|
||||||
|
>>
|
||||||
|
token dot /\\./ <<
|
||||||
|
$mode(identonly);
|
||||||
|
>>
|
||||||
|
default, identonly: drop /\\s+/;
|
||||||
|
Start -> abc dot ident <<
|
||||||
|
printf("ident: %c\\n", $3);
|
||||||
|
>>
|
||||||
|
EOF
|
||||||
|
when "d"
|
||||||
|
write_grammar <<EOF
|
||||||
|
<<
|
||||||
|
import std.stdio;
|
||||||
|
>>
|
||||||
|
ptype char;
|
||||||
|
token abc;
|
||||||
|
token def;
|
||||||
|
default, identonly: token ident /[a-z]+/ <<
|
||||||
|
$$ = match[0];
|
||||||
|
$mode(default);
|
||||||
|
>>
|
||||||
|
token dot /\\./ <<
|
||||||
|
$mode(identonly);
|
||||||
|
>>
|
||||||
|
default, identonly: drop /\\s+/;
|
||||||
|
Start -> abc dot ident <<
|
||||||
|
writeln("ident: ", $3);
|
||||||
|
>>
|
||||||
|
EOF
|
||||||
|
end
|
||||||
|
run_propane(language: language)
|
||||||
|
compile("spec/test_lexer_multiple_modes.#{language}", language: language)
|
||||||
|
results = run_test
|
||||||
|
expect(results.status).to eq 0
|
||||||
|
verify_lines(results.stdout, [
|
||||||
|
"ident: d",
|
||||||
|
"pass1",
|
||||||
|
"ident: a",
|
||||||
|
"pass2",
|
||||||
|
])
|
||||||
|
end
|
||||||
|
|
||||||
it "executes user code associated with a parser rule" do
|
it "executes user code associated with a parser rule" do
|
||||||
case language
|
case language
|
||||||
when "c"
|
when "c"
|
||||||
|
|||||||
20
spec/test_lexer_multiple_modes.c
Normal file
20
spec/test_lexer_multiple_modes.c
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#include "testparser.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
char const * input = "abc.def";
|
||||||
|
p_context_t context;
|
||||||
|
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
printf("pass1\n");
|
||||||
|
|
||||||
|
input = "abc . abc";
|
||||||
|
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
printf("pass2\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
21
spec/test_lexer_multiple_modes.d
Normal file
21
spec/test_lexer_multiple_modes.d
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import testparser;
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string input = `abc.def`;
|
||||||
|
p_context_t context;
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
writeln("pass1");
|
||||||
|
|
||||||
|
input = `abc . abc`;
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
writeln("pass2");
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user