Allow multiple lexer modes to be specified for a lexer pattern - close #35

This commit is contained in:
Josh Holtrop 2025-03-11 20:40:10 -04:00
parent 54bb3307cd
commit eb9d9026fc
11 changed files with 166 additions and 30 deletions

View File

@ -1,5 +1,6 @@
source "https://rubygems.org"
gem "base64"
gem "rake"
gem "rspec"
gem "rdoc"

View File

@ -1,6 +1,7 @@
GEM
remote: https://rubygems.org/
specs:
base64 (0.2.0)
date (3.4.1)
diff-lcs (1.5.1)
docile (1.4.1)
@ -37,6 +38,7 @@ PLATFORMS
ruby
DEPENDENCIES
base64
rake
rdoc
redcarpet

View File

@ -536,6 +536,28 @@ It also returns the `str` token now that the token is complete.
Note that the token name `str` above could have been `string` instead - the
namespace for token names is distinct from the namespace for lexer modes.
Multiple modes can be specified for a token or pattern or drop statement.
For example, if the grammar wanted to only recognize an identifier following
a `.` token and not other keywords, it could switch to an `identonly` mode
when matching a `.`
The `ident` token pattern will be matched in either the `default` or
`identonly` mode.
```
ptype char;
token abc;
token def;
default, identonly: token ident /[a-z]+/ <<
$$ = match[0];
$mode(default);
return $token(ident);
>>
token dot /\\./ <<
$mode(identonly);
>>
default, identonly: drop /\\s+/;
```
##> Specifying parser value types - the `ptype` statement
The `ptype` statement is used to define parser value type(s).

View File

@ -43,8 +43,8 @@ class Propane
# Assign default pattern mode to patterns without a mode assigned.
found_default = false
@grammar.patterns.each do |pattern|
if pattern.mode.nil?
pattern.mode = "default"
if pattern.modes.empty?
pattern.modes << "default"
found_default = true
end
pattern.ptypename ||= "default"

View File

@ -25,7 +25,7 @@ class Propane
@code_blocks = {}
@line_number = 1
@next_line_number = @line_number
@mode = nil
@modeline = nil
@input = input.gsub("\r\n", "\n")
@ptypes = {"default" => "void *"}
@prefix = "p_"
@ -58,7 +58,7 @@ class Propane
def parse_statement!
if parse_white_space!
elsif parse_comment_line!
elsif @mode.nil? && parse_mode_label!
elsif @modeline.nil? && parse_mode_label!
elsif parse_ast_statement!
elsif parse_ast_prefix_statement!
elsif parse_ast_suffix_statement!
@ -81,8 +81,8 @@ class Propane
end
def parse_mode_label!
if md = consume!(/(#{IDENTIFIER_REGEX})\s*:/)
@mode = md[1]
if md = consume!(/(#{IDENTIFIER_REGEX}(?:\s*,\s*#{IDENTIFIER_REGEX})*)\s*:/)
@modeline = md[1]
end
end
@ -117,7 +117,7 @@ class Propane
md = consume!(/([\w.]+)\s*/, "expected module name")
@modulename = md[1]
consume!(/;/, "expected `;'")
@mode = nil
@modeline = nil
true
end
end
@ -153,9 +153,9 @@ class Propane
end
token = Token.new(name, ptypename, @line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
@patterns << pattern
@mode = nil
@modeline = nil
true
end
end
@ -173,7 +173,7 @@ class Propane
consume!(/;/, "expected `;'");
token = Token.new(name, ptypename, @line_number)
@tokens << token
@mode = nil
@modeline = nil
true
end
end
@ -186,8 +186,8 @@ class Propane
end
consume!(/\s+/)
consume!(/;/, "expected `;'")
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, mode: @mode)
@mode = nil
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, modes: get_modes_from_modeline)
@modeline = nil
true
end
end
@ -208,7 +208,7 @@ class Propane
end
end
@rules << Rule.new(rule_name, components, code, ptypename, @line_number)
@mode = nil
@modeline = nil
true
end
end
@ -225,8 +225,8 @@ class Propane
unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block to follow pattern")
end
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
@mode = nil
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
@modeline = nil
true
end
end
@ -247,7 +247,7 @@ class Propane
else
@code_blocks[name] = code
end
@mode = nil
@modeline = nil
true
end
end
@ -315,6 +315,14 @@ class Propane
end
end
def get_modes_from_modeline
if @modeline
Set[*@modeline.split(",").map(&:strip)]
else
Set.new
end
end
end
end

View File

@ -26,8 +26,14 @@ class Propane
private
def build_tables!
@modes = @grammar.patterns.group_by do |pattern|
pattern.mode
modenames = @grammar.patterns.reduce(Set.new) do |result, pattern|
result + pattern.modes
end
@modes = modenames.reduce({}) do |result, modename|
result[modename] = @grammar.patterns.select do |pattern|
pattern.modes.include?(modename)
end
result
end.transform_values do |patterns|
{dfa: DFA.new(patterns)}
end

View File

@ -26,9 +26,9 @@ class Propane
# Regex NFA for matching the pattern.
attr_reader :nfa
# @return [String, nil]
# Lexer mode for this pattern.
attr_accessor :mode
# @return [Set]
# Lexer modes for this pattern.
attr_accessor :modes
# @return [String, nil]
# Parser value type name.
@ -46,14 +46,14 @@ class Propane
# Token to be returned by this pattern.
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
# @option options [String, nil] :mode
# Lexer mode for this pattern.
# @option options [String, nil] :modes
# Lexer modes for this pattern.
def initialize(options)
@code = options[:code]
@pattern = options[:pattern]
@token = options[:token]
@line_number = options[:line_number]
@mode = options[:mode]
@modes = options[:modes]
@ptypename = options[:ptypename]
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self

View File

@ -151,30 +151,30 @@ EOF
o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil
expect(o.mode).to be_nil
expect(o.modes).to be_empty
o = grammar.tokens.find {|token| token.name == "b"}
expect(o).to_not be_nil
o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil
expect(o.mode).to eq "m1"
expect(o.modes).to eq Set["m1"]
o = grammar.patterns.find {|pattern| pattern.pattern == "foo"}
expect(o).to_not be_nil
expect(o.mode).to be_nil
expect(o.modes).to be_empty
o = grammar.patterns.find {|pattern| pattern.pattern == "bar"}
expect(o).to_not be_nil
expect(o.mode).to eq "m2"
expect(o.modes).to eq Set["m2"]
o = grammar.patterns.find {|pattern| pattern.pattern == "q"}
expect(o).to_not be_nil
expect(o.mode).to be_nil
expect(o.modes).to be_empty
o = grammar.patterns.find {|pattern| pattern.pattern == "r"}
expect(o).to_not be_nil
expect(o.mode).to eq "m3"
expect(o.modes).to eq Set["m3"]
end
it "allows assigning ptypes to tokens and rules" do

View File

@ -621,6 +621,62 @@ EOF
])
end
it "multiple lexer modes may apply to a pattern" do
case language
when "c"
write_grammar <<EOF
<<
#include <stdio.h>
>>
ptype char;
token abc;
token def;
default, identonly: token ident /[a-z]+/ <<
$$ = match[0];
$mode(default);
return $token(ident);
>>
token dot /\\./ <<
$mode(identonly);
>>
default, identonly: drop /\\s+/;
Start -> abc dot ident <<
printf("ident: %c\\n", $3);
>>
EOF
when "d"
write_grammar <<EOF
<<
import std.stdio;
>>
ptype char;
token abc;
token def;
default, identonly: token ident /[a-z]+/ <<
$$ = match[0];
$mode(default);
>>
token dot /\\./ <<
$mode(identonly);
>>
default, identonly: drop /\\s+/;
Start -> abc dot ident <<
writeln("ident: ", $3);
>>
EOF
end
run_propane(language: language)
compile("spec/test_lexer_multiple_modes.#{language}", language: language)
results = run_test
expect(results.status).to eq 0
verify_lines(results.stdout, [
"ident: d",
"pass1",
"ident: a",
"pass2",
])
end
it "executes user code associated with a parser rule" do
case language
when "c"

View File

@ -0,0 +1,20 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
int main()
{
char const * input = "abc.def";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass1\n");
input = "abc . abc";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
printf("pass2\n");
return 0;
}

View File

@ -0,0 +1,21 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = `abc.def`;
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
writeln("pass1");
input = `abc . abc`;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
writeln("pass2");
}