Compare commits
148 Commits
064bb94108
...
164a4854fb
Author | SHA1 | Date | |
---|---|---|---|
164a4854fb | |||
ddadc2008b | |||
fbd215098b | |||
bfe2916165 | |||
c9bc4832f4 | |||
6dfef8573f | |||
f3ed678fe1 | |||
280b749e38 | |||
d6779aef00 | |||
746ec89be8 | |||
997f34a1e4 | |||
a2795bb531 | |||
850e639e3a | |||
5f7e548fe3 | |||
bdb10e7afc | |||
7bdaf7cdbc | |||
08e3516ad9 | |||
2c8f3c6e9a | |||
9dffa3c41a | |||
ceb7e9ee32 | |||
6026bf1514 | |||
9cc1890ddc | |||
e4f2fffe50 | |||
d931bcb513 | |||
2e16b0bd6e | |||
6ce94e15af | |||
3f92ae46c4 | |||
00016f16b3 | |||
9273bfccf6 | |||
f295acb593 | |||
51a31317a6 | |||
9459883e74 | |||
28591907c1 | |||
37d6917b49 | |||
2685c05360 | |||
c0c3353fd7 | |||
3158e51059 | |||
d9e4f64d2e | |||
ec2dcf9a72 | |||
578e165e2d | |||
e8df4296cc | |||
230c324209 | |||
1271e19b50 | |||
12e11399af | |||
24fab8515d | |||
1dcdd87a28 | |||
8aec7ec0de | |||
c96d55b031 | |||
ca7d4862f9 | |||
3c874ae4c1 | |||
748c219625 | |||
71ee7de9f9 | |||
2121acc87e | |||
f2563cf255 | |||
24d12be3b9 | |||
91d6ee25ea | |||
2f1cb47bea | |||
651461c570 | |||
3ce54bd303 | |||
15454f926a | |||
4beb3d2016 | |||
aae7bc188c | |||
a716dedeb6 | |||
93cb25df62 | |||
61dd5bc5a0 | |||
10a8ef5eb4 | |||
98584ce07a | |||
2122ca02fe | |||
5881f13380 | |||
ebc1d8f001 | |||
5fecd5c6a2 | |||
5b688b090d | |||
f77218801f | |||
70118dd019 | |||
d552f2a540 | |||
d2fac07249 | |||
a34272dfd6 | |||
9d05861819 | |||
03035a25a5 | |||
db70f8b94d | |||
f67dd62b20 | |||
c6bac6d3a1 | |||
aa92970c31 | |||
b8282e748e | |||
930ac56148 | |||
7f54778ba8 | |||
701903def2 | |||
afea886ecb | |||
03b2e87186 | |||
e4370cac62 | |||
ed3f599e25 | |||
1228a76c55 | |||
538e360cb3 | |||
e7f8c3726c | |||
b6e3a5c151 | |||
35ef94dbd3 | |||
37e1252ded | |||
214ece7d90 | |||
8473df421a | |||
3987f08cd7 | |||
3a1650906e | |||
952bffc33c | |||
f64f3683c6 | |||
43f5caf449 | |||
f38a7456e9 | |||
c77c81bf25 | |||
7196a0605a | |||
24054461a2 | |||
89a5976064 | |||
d3df67be1e | |||
791340b292 | |||
cf8718b69c | |||
39f164a7db | |||
70b3e56de2 | |||
2e8e72a1e8 | |||
ea27baa630 | |||
d8dd64d860 | |||
54cefda186 | |||
201a38fb51 | |||
33f9d01883 | |||
9b09625c8a | |||
6119d860bc | |||
611ebeeddd | |||
449eec4982 | |||
8cd648fc8f | |||
885ef6c151 | |||
60adffbbab | |||
b8c01ca1d1 | |||
b04ff56308 | |||
ca1d2d1e5c | |||
13403405b0 | |||
07dd68e367 | |||
c1666a1e74 | |||
768a0ef17f | |||
9e865d1982 | |||
9884047090 | |||
04393dcc51 | |||
7f27b3fd6f | |||
37ad87d602 | |||
23b7782a5d | |||
0cc4516c0e | |||
75a1049040 | |||
a9ff93dda4 | |||
d879a93d09 | |||
ee27c5e9b1 | |||
989e5f47de | |||
04e17cde30 | |||
bc217e7ddb |
19
.gitignore
vendored
19
.gitignore
vendored
@ -1,9 +1,10 @@
|
||||
imbecile
|
||||
tags
|
||||
*.o
|
||||
.*.swp
|
||||
*.dep
|
||||
tmpl.*
|
||||
tests/*/itest.cc
|
||||
tests/*/itest.h
|
||||
tests/*/test
|
||||
/.bundle/
|
||||
/.yardoc
|
||||
/_yardoc/
|
||||
/coverage/
|
||||
/doc/
|
||||
/pkg/
|
||||
/spec/reports/
|
||||
/tmp/
|
||||
/.rspec_status
|
||||
/spec/run/
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
||||
[submodule "refptr"]
|
||||
path = refptr
|
||||
url = http://github.com/holtrop/refptr.git
|
28
Gemfile.lock
Normal file
28
Gemfile.lock
Normal file
@ -0,0 +1,28 @@
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
diff-lcs (1.5.0)
|
||||
rake (13.0.6)
|
||||
rspec (3.11.0)
|
||||
rspec-core (~> 3.11.0)
|
||||
rspec-expectations (~> 3.11.0)
|
||||
rspec-mocks (~> 3.11.0)
|
||||
rspec-core (3.11.0)
|
||||
rspec-support (~> 3.11.0)
|
||||
rspec-expectations (3.11.0)
|
||||
diff-lcs (>= 1.2.0, < 2.0)
|
||||
rspec-support (~> 3.11.0)
|
||||
rspec-mocks (3.11.1)
|
||||
diff-lcs (>= 1.2.0, < 2.0)
|
||||
rspec-support (~> 3.11.0)
|
||||
rspec-support (3.11.0)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
|
||||
DEPENDENCIES
|
||||
rake
|
||||
rspec
|
||||
|
||||
BUNDLED WITH
|
||||
2.4.0.dev
|
21
LICENSE.txt
Normal file
21
LICENSE.txt
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2010-2022 Josh Holtrop
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
61
Makefile
61
Makefile
@ -1,61 +0,0 @@
|
||||
|
||||
TARGET := imbecile
|
||||
CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
|
||||
CXXDEPS := $(patsubst %.o,.%.dep,$(CXXOBJS))
|
||||
CXXFLAGS := -O2
|
||||
DEPS := $(CXXDEPS)
|
||||
OBJS := $(CXXOBJS)
|
||||
LDFLAGS := -lpcre
|
||||
CPPFLAGS := -I$(shell pwd)/refptr
|
||||
|
||||
all: submodule_check tmpl.h $(TARGET)
|
||||
|
||||
.PHONY: submodule_check
|
||||
submodule_check:
|
||||
@if [ ! -e refptr/refptr.h ]; then \
|
||||
echo Error: \"refptr\" folder is not populated.; \
|
||||
echo Perhaps you forgot to do \"git checkout --recursive\"?; \
|
||||
echo You can remedy the situation with \"git submodule update --init\".; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CXX) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
# Object file rules
|
||||
%.o: %.cc
|
||||
$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
|
||||
|
||||
# Make dependency files
|
||||
.%.dep: %.c
|
||||
@set -e; rm -f $@; \
|
||||
$(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
|
||||
|
||||
.%.dep: %.cc tmpl.h
|
||||
@set -e; rm -f $@; \
|
||||
$(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
|
||||
|
||||
tmpl.cc: $(wildcard tmpl/*)
|
||||
echo -n > $@
|
||||
for f in $*/*; \
|
||||
do xxd -i $$f >> $@; \
|
||||
done
|
||||
|
||||
tmpl.h: tmpl.cc
|
||||
echo '#ifndef $*_h' > $@
|
||||
echo '#define $*_h' >> $@
|
||||
grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
|
||||
echo '#endif' >> $@
|
||||
|
||||
.PHONY: tests
|
||||
tests: PATH := $(shell pwd):$(PATH)
|
||||
tests: all
|
||||
$(MAKE) -C $@
|
||||
|
||||
tests-clean:
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean: tests-clean
|
||||
-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
|
||||
|
||||
-include $(CXXDEPS)
|
423
Parser.cc
423
Parser.cc
@ -1,423 +0,0 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
#include <ctype.h> /* toupper() */
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "Parser.h"
|
||||
#include "TokenDefinition.h"
|
||||
#include "RuleDefinition.h"
|
||||
#include "tmpl.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define DEBUG
|
||||
|
||||
Parser::Parser()
|
||||
: m_classname("Parser"), m_namespace(""), m_extension("cc"),
|
||||
m_token_data(new string()), m_token_code(new string()),
|
||||
m_defines(new string())
|
||||
{
|
||||
}
|
||||
|
||||
void Parser::makeDefine(const string & defname, const string & definition)
|
||||
{
|
||||
*m_defines += string("#define ") + defname + " " + definition + "\n";
|
||||
}
|
||||
|
||||
bool Parser::write(const string & fname)
|
||||
{
|
||||
if (m_tokens.size() < 1 || m_rules.size() < 1)
|
||||
return false;
|
||||
|
||||
string header_fname = fname + ".h";
|
||||
string body_fname = fname + "." + m_extension;
|
||||
|
||||
ofstream header(header_fname.c_str());
|
||||
ofstream body(body_fname.c_str());
|
||||
|
||||
/* process data */
|
||||
refptr<string> token_classes = new string();
|
||||
refptr<string> token_classes_code = new string();
|
||||
int i = 0;
|
||||
for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
|
||||
it != m_tokens.end();
|
||||
it++)
|
||||
{
|
||||
char buff[20];
|
||||
sprintf(buff, "%d", i++);
|
||||
makeDefine((*it)->getIdentifier(), buff);
|
||||
*token_classes += (*it)->getClassDefinition();
|
||||
*token_classes_code += (*it)->getProcessMethod();
|
||||
}
|
||||
if (m_namespace != "")
|
||||
{
|
||||
makeDefine("I_NAMESPACE", m_namespace);
|
||||
}
|
||||
makeDefine("I_CLASSNAME", m_classname);
|
||||
|
||||
/* set up replacements */
|
||||
setReplacement("token_list", buildTokenList());
|
||||
setReplacement("buildToken", buildBuildToken());
|
||||
setReplacement("header_name",
|
||||
new string(string("\"") + header_fname + "\""));
|
||||
setReplacement("token_code", m_token_code);
|
||||
setReplacement("token_data", m_token_data);
|
||||
setReplacement("defines", m_defines);
|
||||
setReplacement("token_classes", token_classes);
|
||||
setReplacement("token_classes_code", token_classes_code);
|
||||
|
||||
/* write the header */
|
||||
writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
|
||||
|
||||
/* write the body */
|
||||
writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
|
||||
|
||||
header.close();
|
||||
body.close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
|
||||
{
|
||||
char * newline;
|
||||
char * data = dat;
|
||||
const char * errptr;
|
||||
int erroffset;
|
||||
data[len-1] = '\n';
|
||||
const int ovec_size = 6;
|
||||
int ovector[ovec_size];
|
||||
pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
|
||||
while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
|
||||
{
|
||||
if (pcre_exec(replace, NULL, data, newline - data,
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
if (ovector[0] > 0)
|
||||
{
|
||||
out.write(data, ovector[0]);
|
||||
}
|
||||
out << *getReplacement(string(data, ovector[2],
|
||||
ovector[3] - ovector[2]));
|
||||
if (ovector[1] < newline - data)
|
||||
{
|
||||
out.write(data + ovector[1], newline - data - ovector[1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out.write(data, newline - data);
|
||||
}
|
||||
out << '\n';
|
||||
data = newline + 1;
|
||||
}
|
||||
}
|
||||
|
||||
refptr<std::string> Parser::getReplacement(const std::string & name)
|
||||
{
|
||||
if (m_replacements.find(name) != m_replacements.end())
|
||||
{
|
||||
return m_replacements[name];
|
||||
}
|
||||
#ifdef DEBUG
|
||||
cerr << "No replacement found for \"" << name << "\"" << endl;
|
||||
#endif
|
||||
return new string("");
|
||||
}
|
||||
|
||||
refptr<string> Parser::buildTokenList()
|
||||
{
|
||||
refptr<string> tokenlist = new string();
|
||||
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
|
||||
t != m_tokens.end();
|
||||
t++)
|
||||
{
|
||||
if (t != m_tokens.begin())
|
||||
*tokenlist += " ";
|
||||
*tokenlist += "{ \"" + (*t)->getName() + "\", \""
|
||||
+ (*t)->getCString() + "\", "
|
||||
+ ((*t)->getProcessFlag() ? "true" : "false") + " }";
|
||||
if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
|
||||
*tokenlist += ",\n";
|
||||
}
|
||||
return tokenlist;
|
||||
}
|
||||
|
||||
refptr<string> Parser::buildBuildToken()
|
||||
{
|
||||
refptr<string> buildToken = new string();
|
||||
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
|
||||
t != m_tokens.end();
|
||||
t++)
|
||||
{
|
||||
*buildToken += "case " + (*t)->getIdentifier() + ":\n";
|
||||
*buildToken += " token = new " + (*t)->getClassName() + "();\n";
|
||||
*buildToken += " break;\n";
|
||||
}
|
||||
return buildToken;
|
||||
}
|
||||
|
||||
bool Parser::parseInputFile(char * buff, int size)
|
||||
{
|
||||
typedef pcre * pcre_ptr;
|
||||
enum { none, tokens, rules };
|
||||
pcre_ptr empty, comment, section_name, token, rule,
|
||||
data_begin, data_end, code_begin, code_end;
|
||||
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
|
||||
{&empty, "^\\s*$"},
|
||||
{&comment, "^\\s*#"},
|
||||
{§ion_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
|
||||
{&token, "^\\s*" /* possible leading ws */
|
||||
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
|
||||
"\\s+" /* required whitespace */
|
||||
"((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
|
||||
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
|
||||
{&data_begin, "^\\s*\\${"},
|
||||
{&data_end, "\\$}"},
|
||||
{&code_begin, "^\\s*%{"},
|
||||
{&code_end, "%}"}
|
||||
};
|
||||
const int ovec_size = 3 * 10;
|
||||
int ovector[ovec_size];
|
||||
int lineno = 0;
|
||||
char * newline;
|
||||
char * input = buff;
|
||||
string current_section_name;
|
||||
map<string, int> sections;
|
||||
sections["none"] = none;
|
||||
sections["tokens"] = tokens;
|
||||
sections["rules"] = rules;
|
||||
int section = none;
|
||||
string line;
|
||||
bool append_line = false;
|
||||
bool gathering_data = false;
|
||||
bool gathering_code = false;
|
||||
string gather;
|
||||
bool continue_line = false;
|
||||
TokenDefinitionRef current_token;
|
||||
|
||||
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
|
||||
{
|
||||
const char * errptr;
|
||||
int erroffset;
|
||||
*exprs[i].re = pcre_compile(exprs[i].pattern, 0,
|
||||
&errptr, &erroffset, NULL);
|
||||
if (*exprs[i].re == NULL)
|
||||
{
|
||||
cerr << "Error compiling regex '" << exprs[i].pattern <<
|
||||
"': " << errptr << " at position " << erroffset << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (continue_line)
|
||||
{
|
||||
continue_line = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((newline = strstr(input, "\n")) == NULL)
|
||||
break;
|
||||
int line_length = newline - input;
|
||||
if (line_length >= 1 && newline[-1] == '\r')
|
||||
{
|
||||
newline[-1] = '\n';
|
||||
line_length--;
|
||||
}
|
||||
lineno++;
|
||||
|
||||
if (append_line)
|
||||
{
|
||||
line += string(input, line_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
line = string(input, line_length);
|
||||
}
|
||||
input = newline + 1; /* set up for next loop iteration */
|
||||
}
|
||||
|
||||
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
|| (pcre_exec(comment, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
)
|
||||
{
|
||||
/* skip empty or comment lines */;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (! (gathering_code || gathering_data) )
|
||||
{
|
||||
if (line.size() > 0 && line[line.size()-1] == '\\')
|
||||
{
|
||||
line[line.size()-1] = ' ';
|
||||
append_line = true;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
append_line = false;
|
||||
}
|
||||
|
||||
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
current_section_name
|
||||
= string(line, ovector[2], ovector[3] - ovector[2]);
|
||||
if (sections.find(current_section_name) != sections.end())
|
||||
{
|
||||
section = sections[current_section_name];
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Unknown section name '" << current_section_name
|
||||
<< "'!" << endl;
|
||||
return false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
switch (section)
|
||||
{
|
||||
case none:
|
||||
cerr << "Unrecognized input on line " << lineno << endl;
|
||||
return false;
|
||||
case tokens:
|
||||
if (gathering_data)
|
||||
{
|
||||
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
gather += string(line, 0, ovector[0]) + "\n";
|
||||
gathering_data = false;
|
||||
line = string(line, ovector[1]);
|
||||
continue_line = true;
|
||||
if (current_token.isNull())
|
||||
{
|
||||
*m_token_data += gather;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_token->addData(gather);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gather += line + "\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (gathering_code)
|
||||
{
|
||||
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
gather += string(line, 0, ovector[0]) + "\n";
|
||||
gathering_code = false;
|
||||
line = string(line, ovector[1]);
|
||||
continue_line = true;
|
||||
if (current_token.isNull())
|
||||
{
|
||||
*m_token_code += gather;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_token->addCode(gather);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gather += line + "\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
gathering_data = true;
|
||||
gather = "";
|
||||
line = string(line, ovector[1]);
|
||||
continue_line = true;
|
||||
continue;
|
||||
}
|
||||
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
gathering_code = true;
|
||||
gather = "";
|
||||
line = string(line, ovector[1]);
|
||||
continue_line = true;
|
||||
continue;
|
||||
}
|
||||
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
string name(line, ovector[2], ovector[3] - ovector[2]);
|
||||
string definition(line,
|
||||
ovector[4], ovector[5] - ovector[4]);
|
||||
current_token = new TokenDefinition();
|
||||
if (current_token->create(name, definition))
|
||||
{
|
||||
addTokenDefinition(current_token);
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Error in token definition ending on line "
|
||||
<< lineno << endl;
|
||||
return false;
|
||||
}
|
||||
line = string(line, ovector[1]);
|
||||
continue_line = true;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Unrecognized input on line " << lineno << endl;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case rules:
|
||||
if (pcre_exec(rule, NULL, line.c_str(), line.size(),
|
||||
0, 0, ovector, ovec_size) >= 0)
|
||||
{
|
||||
string name(line, ovector[2], ovector[3] - ovector[2]);
|
||||
string definition(line,
|
||||
ovector[4], ovector[5] - ovector[4]);
|
||||
refptr<RuleDefinition> rd = new RuleDefinition();
|
||||
if (rd->create(name, definition))
|
||||
{
|
||||
addRuleDefinition(rd);
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Error in rule definition ending on line "
|
||||
<< lineno << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Unrecognized input on line " << lineno << endl;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
|
||||
{
|
||||
pcre_free(*exprs[i].re);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
61
Parser.h
61
Parser.h
@ -1,61 +0,0 @@
|
||||
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <map>
|
||||
|
||||
#include "refptr.h"
|
||||
#include "TokenDefinition.h"
|
||||
#include "RuleDefinition.h"
|
||||
|
||||
class Parser
|
||||
{
|
||||
public:
|
||||
Parser();
|
||||
void addTokenDefinition(refptr<TokenDefinition> td)
|
||||
{
|
||||
m_tokens.push_back(td);
|
||||
}
|
||||
void addRuleDefinition(refptr<RuleDefinition> rd)
|
||||
{
|
||||
m_rules.push_back(rd);
|
||||
}
|
||||
bool write(const std::string & fname);
|
||||
bool parseInputFile(char * buff, int size);
|
||||
|
||||
void setClassName(const std::string & cn) { m_classname = cn; }
|
||||
std::string getClassName() { return m_classname; }
|
||||
|
||||
void setNamespace(const std::string & ns) { m_namespace = ns; }
|
||||
std::string getNamespace() { return m_namespace; }
|
||||
|
||||
void setExtension(const std::string & e) { m_extension = e; }
|
||||
std::string getExtension() { return m_extension; }
|
||||
|
||||
protected:
|
||||
refptr<std::string> buildTokenList();
|
||||
refptr<std::string> buildBuildToken();
|
||||
bool writeTmpl(std::ostream & out, char * dat, int len);
|
||||
refptr<std::string> getReplacement(const std::string & name);
|
||||
void setReplacement(const std::string & name, refptr<std::string> val)
|
||||
{
|
||||
m_replacements[name] = val;
|
||||
}
|
||||
void makeDefine(const std::string & defname,
|
||||
const std::string & definition);
|
||||
|
||||
std::list<TokenDefinitionRef> m_tokens;
|
||||
std::vector< refptr< RuleDefinition > > m_rules;
|
||||
std::string m_classname;
|
||||
std::string m_namespace;
|
||||
std::string m_extension;
|
||||
std::map< std::string, refptr<std::string> > m_replacements;
|
||||
refptr<std::string> m_token_data;
|
||||
refptr<std::string> m_token_code;
|
||||
refptr<std::string> m_defines;
|
||||
};
|
||||
|
||||
#endif
|
5
README
5
README
@ -1,5 +0,0 @@
|
||||
Imbecile is a bottom-up parser generator. It targets C++ and automatically
|
||||
generates a class heirarchy for interacting with the parser.
|
||||
|
||||
Imbecile generates both a lexer and a parser based on the rules given to
|
||||
it in the input file.
|
31
README.md
Normal file
31
README.md
Normal file
@ -0,0 +1,31 @@
|
||||
# The Propane Parser Generator
|
||||
|
||||
Propane is an LR Parser Generator (LPG) which:
|
||||
|
||||
* accepts LR(0), SLR, and LALR grammars
|
||||
* generates a built-in lexer to tokenize input
|
||||
* supports UTF-8 lexer inputs
|
||||
* generates a table-driven parser to parse input in linear time
|
||||
* is MIT-licensed
|
||||
* is distributable as a standalone Ruby script
|
||||
|
||||
## Installation
|
||||
|
||||
TODO
|
||||
|
||||
## Usage
|
||||
|
||||
TODO: Write usage instructions here
|
||||
|
||||
## Development
|
||||
|
||||
After checking out the repository, run `bundle install` to install dependencies.
|
||||
Run `rake spec` to execute tests.
|
||||
|
||||
## Contributing
|
||||
|
||||
Bug reports and pull requests are welcome on GitHub at https://github.com/holtrop/propane.
|
||||
|
||||
## License
|
||||
|
||||
Propane is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
9
Rakefile
Normal file
9
Rakefile
Normal file
@ -0,0 +1,9 @@
|
||||
require "rspec/core/rake_task"
|
||||
|
||||
RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
|
||||
if args.example_pattern
|
||||
task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
|
||||
end
|
||||
end
|
||||
|
||||
task :default => :spec
|
@ -1,9 +0,0 @@
|
||||
|
||||
#include "RuleDefinition.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool RuleDefinition::create(const string & name, const string & definition)
|
||||
{
|
||||
m_name = name;
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
|
||||
#ifndef RULEDEFINITION_H
|
||||
#define RULEDEFINITION_H
|
||||
|
||||
#include <string>
|
||||
|
||||
class RuleDefinition
|
||||
{
|
||||
public:
|
||||
bool create(const std::string & name, const std::string & definition);
|
||||
|
||||
protected:
|
||||
std::string m_name;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,125 +0,0 @@
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "TokenDefinition.h"
|
||||
#include "refptr.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define WHITESPACE " \n\r\t\v"
|
||||
|
||||
static string trim(string s)
|
||||
{
|
||||
size_t lastpos = s.find_last_not_of(WHITESPACE);
|
||||
if (lastpos == string::npos)
|
||||
return "";
|
||||
s.erase(lastpos + 1);
|
||||
s.erase(0, s.find_first_not_of(WHITESPACE));
|
||||
return s;
|
||||
}
|
||||
|
||||
static refptr< vector<string> > split(const string & delim, string str)
|
||||
{
|
||||
refptr< vector<string> > ret = new vector<string>();
|
||||
size_t pos;
|
||||
while ( (pos = str.find(delim)) != string::npos )
|
||||
{
|
||||
string t = str.substr(0, pos);
|
||||
ret->push_back(t);
|
||||
str.erase(0, pos + 1);
|
||||
}
|
||||
if (str != "")
|
||||
ret->push_back(str);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static string c_escape(const string & orig)
|
||||
{
|
||||
string result;
|
||||
for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
|
||||
{
|
||||
if (*it == '\\' || *it == '"')
|
||||
result += '\\';
|
||||
result += *it;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
TokenDefinition::TokenDefinition()
|
||||
: m_process(false)
|
||||
{
|
||||
}
|
||||
|
||||
bool TokenDefinition::create(const string & name,
|
||||
const string & definition)
|
||||
{
|
||||
const char * errptr;
|
||||
int erroffset;
|
||||
pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
|
||||
if (re == NULL)
|
||||
{
|
||||
cerr << "Error compiling regular expression '" << definition
|
||||
<< "' at position " << erroffset << ": " << errptr << endl;
|
||||
return false;
|
||||
}
|
||||
m_name = name;
|
||||
m_definition = definition;
|
||||
pcre_free(re);
|
||||
|
||||
#if 0
|
||||
refptr< vector< string > > parts = split(",", flags);
|
||||
for (int i = 0, sz = parts->size(); i < sz; i++)
|
||||
{
|
||||
(*parts)[i] = trim((*parts)[i]);
|
||||
string & s = (*parts)[i];
|
||||
if (s == "p")
|
||||
{
|
||||
m_process = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Unknown token flag \"" << s << "\"" << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string TokenDefinition::getCString() const
|
||||
{
|
||||
return c_escape(m_definition);
|
||||
}
|
||||
|
||||
string TokenDefinition::getClassDefinition() const
|
||||
{
|
||||
string ret = "class "+ getClassName() + " : public Token {\n";
|
||||
ret += "public:\n";
|
||||
if (m_process)
|
||||
{
|
||||
ret += " virtual void process(const Matches & matches);\n";
|
||||
}
|
||||
ret += "\n";
|
||||
ret += "protected:\n";
|
||||
ret += m_data + "\n";
|
||||
ret += "};\n";
|
||||
return ret;
|
||||
}
|
||||
|
||||
string TokenDefinition::getProcessMethod() const
|
||||
{
|
||||
string ret;
|
||||
if (m_code != "")
|
||||
{
|
||||
ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
|
||||
ret += m_code + "\n";
|
||||
ret += "}\n";
|
||||
}
|
||||
return ret;
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
|
||||
#ifndef TOKENDEFINITION_H
|
||||
#define TOKENDEFINITION_H
|
||||
|
||||
#include <string>
|
||||
#include "refptr.h"
|
||||
|
||||
class TokenDefinition
|
||||
{
|
||||
public:
|
||||
TokenDefinition();
|
||||
bool create(const std::string & name,
|
||||
const std::string & definition);
|
||||
std::string getCString() const;
|
||||
std::string getName() const { return m_name; }
|
||||
bool getProcessFlag() const { return m_process; }
|
||||
void setProcessFlag(bool p) { m_process = p; }
|
||||
void addData(const std::string & d) { m_data += d; }
|
||||
std::string getData() const { return m_data; }
|
||||
void addCode(const std::string & c) { m_code += c; m_process = true; }
|
||||
std::string getCode() const { return m_code; }
|
||||
std::string getClassDefinition() const;
|
||||
std::string getProcessMethod() const;
|
||||
std::string getIdentifier() const { return "TK_" + m_name; }
|
||||
std::string getClassName() const { return "Tk" + m_name; }
|
||||
|
||||
protected:
|
||||
std::string m_name;
|
||||
std::string m_definition;
|
||||
bool m_process;
|
||||
std::string m_data;
|
||||
std::string m_code;
|
||||
};
|
||||
|
||||
typedef refptr<TokenDefinition> TokenDefinitionRef;
|
||||
|
||||
#endif
|
252
assets/parser.d.erb
Normal file
252
assets/parser.d.erb
Normal file
@ -0,0 +1,252 @@
|
||||
<% if @modulename %>
|
||||
module <%= @modulename %>;
|
||||
|
||||
<% end %>
|
||||
class <%= classname %>
|
||||
{
|
||||
enum
|
||||
{
|
||||
<% @tokens.each_with_index do |(name, token), index| %>
|
||||
<% if token.name %>
|
||||
TOKEN_<%= token.c_name %> = <%= index %>,
|
||||
<% end %>
|
||||
<% end %>
|
||||
TOKEN_EOF = <%= TOKEN_EOF %>,
|
||||
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||
TOKEN_DROP = <%= TOKEN_DROP %>,
|
||||
TOKEN_NONE = <%= TOKEN_NONE %>,
|
||||
}
|
||||
|
||||
static immutable string TokenNames[] = [
|
||||
<% @tokens.each_with_index do |(name, token), index| %>
|
||||
<% if token.name %>
|
||||
"<%= token.name %>",
|
||||
<% else %>
|
||||
null,
|
||||
<% end %>
|
||||
<% end %>
|
||||
];
|
||||
|
||||
static class Decoder
|
||||
{
|
||||
enum
|
||||
{
|
||||
CODE_POINT_INVALID = 0xFFFFFFFE,
|
||||
CODE_POINT_EOF = 0xFFFFFFFF,
|
||||
}
|
||||
|
||||
struct DecodedCodePoint
|
||||
{
|
||||
uint code_point;
|
||||
uint code_point_length;
|
||||
}
|
||||
|
||||
static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length)
|
||||
{
|
||||
if (input_length == 0u)
|
||||
{
|
||||
return DecodedCodePoint(CODE_POINT_EOF, 0u);
|
||||
}
|
||||
ubyte c = *input;
|
||||
uint code_point;
|
||||
uint code_point_length;
|
||||
if ((c & 0x80u) == 0u)
|
||||
{
|
||||
code_point = c;
|
||||
code_point_length = 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
ubyte following_bytes;
|
||||
if ((c & 0xE0u) == 0xC0u)
|
||||
{
|
||||
code_point = c & 0x1Fu;
|
||||
following_bytes = 1u;
|
||||
}
|
||||
else if ((c & 0xF0u) == 0xE0u)
|
||||
{
|
||||
code_point = c & 0x0Fu;
|
||||
following_bytes = 2u;
|
||||
}
|
||||
else if ((c & 0xF8u) == 0xF0u)
|
||||
{
|
||||
code_point = c & 0x07u;
|
||||
following_bytes = 3u;
|
||||
}
|
||||
else if ((c & 0xFCu) == 0xF8u)
|
||||
{
|
||||
code_point = c & 0x03u;
|
||||
following_bytes = 4u;
|
||||
}
|
||||
else if ((c & 0xFEu) == 0xFCu)
|
||||
{
|
||||
code_point = c & 0x01u;
|
||||
following_bytes = 5u;
|
||||
}
|
||||
if (input_length <= following_bytes)
|
||||
{
|
||||
return DecodedCodePoint(CODE_POINT_INVALID, 0u);
|
||||
}
|
||||
code_point_length = following_bytes + 1u;
|
||||
while (following_bytes-- > 0u)
|
||||
{
|
||||
input++;
|
||||
code_point <<= 6u;
|
||||
code_point |= *input & 0x3Fu;
|
||||
}
|
||||
}
|
||||
return DecodedCodePoint(code_point, code_point_length);
|
||||
}
|
||||
}
|
||||
|
||||
static class Lexer
|
||||
{
|
||||
private struct Transition
|
||||
{
|
||||
uint first;
|
||||
uint last;
|
||||
uint destination;
|
||||
}
|
||||
|
||||
private struct State
|
||||
{
|
||||
uint transition_table_index;
|
||||
uint n_transitions;
|
||||
uint accepts;
|
||||
}
|
||||
|
||||
<% transition_table, state_table = lexer.dfa.build_tables %>
|
||||
private static const Transition transitions[] = [
|
||||
<% transition_table.each do |transition_table_entry| %>
|
||||
Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
|
||||
<% end %>
|
||||
];
|
||||
|
||||
private static const State states[] = [
|
||||
<% state_table.each do |state_table_entry| %>
|
||||
State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
|
||||
<% end %>
|
||||
];
|
||||
|
||||
struct LexedToken
|
||||
{
|
||||
size_t row;
|
||||
size_t col;
|
||||
size_t length;
|
||||
uint token;
|
||||
}
|
||||
|
||||
private const(ubyte) * m_input;
|
||||
private size_t m_input_length;
|
||||
private size_t m_input_position;
|
||||
private size_t m_input_row;
|
||||
private size_t m_input_col;
|
||||
|
||||
this(const(ubyte) * input, size_t input_length)
|
||||
{
|
||||
m_input = input;
|
||||
m_input_length = input_length;
|
||||
}
|
||||
|
||||
LexedToken lex_token()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
LexedToken lt = attempt_lex_token();
|
||||
if (lt.token != TOKEN_DROP)
|
||||
{
|
||||
return lt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private LexedToken attempt_lex_token()
|
||||
{
|
||||
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
|
||||
struct LexedTokenState
|
||||
{
|
||||
size_t length;
|
||||
size_t delta_row;
|
||||
size_t delta_col;
|
||||
uint token;
|
||||
}
|
||||
LexedTokenState last_accepts_info;
|
||||
last_accepts_info.token = TOKEN_NONE;
|
||||
LexedTokenState attempt_info;
|
||||
uint current_state;
|
||||
for (;;)
|
||||
{
|
||||
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
|
||||
if (decoded.code_point == Decoder.CODE_POINT_INVALID)
|
||||
{
|
||||
lt.token = TOKEN_DECODE_ERROR;
|
||||
return lt;
|
||||
}
|
||||
bool lex_continue = false;
|
||||
if (decoded.code_point != Decoder.CODE_POINT_EOF)
|
||||
{
|
||||
uint dest = transition(current_state, decoded.code_point);
|
||||
if (dest != cast(uint)-1)
|
||||
{
|
||||
lex_continue = true;
|
||||
attempt_info.length += decoded.code_point_length;
|
||||
if (decoded.code_point == '\n')
|
||||
{
|
||||
attempt_info.delta_row++;
|
||||
attempt_info.delta_col = 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
attempt_info.delta_col++;
|
||||
}
|
||||
current_state = dest;
|
||||
if (states[current_state].accepts != TOKEN_NONE)
|
||||
{
|
||||
attempt_info.token = states[current_state].accepts;
|
||||
last_accepts_info = attempt_info;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (attempt_info.length == 0u)
|
||||
{
|
||||
lt.token = TOKEN_EOF;
|
||||
break;
|
||||
}
|
||||
if (!lex_continue)
|
||||
{
|
||||
if (last_accepts_info.token != TOKEN_NONE)
|
||||
{
|
||||
lt.token = last_accepts_info.token;
|
||||
lt.length = last_accepts_info.length;
|
||||
m_input_position += last_accepts_info.length;
|
||||
m_input_row += last_accepts_info.delta_row;
|
||||
if (last_accepts_info.delta_row != 0u)
|
||||
{
|
||||
m_input_col = last_accepts_info.delta_col;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_input_col += last_accepts_info.delta_col;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lt;
|
||||
}
|
||||
|
||||
private uint transition(uint current_state, uint code_point)
|
||||
{
|
||||
uint transition_table_index = states[current_state].transition_table_index;
|
||||
for (uint i = 0u; i < states[current_state].n_transitions; i++)
|
||||
{
|
||||
if ((transitions[transition_table_index + i].first <= code_point) &&
|
||||
(code_point <= transitions[transition_table_index + i].last))
|
||||
{
|
||||
return transitions[transition_table_index + i].destination;
|
||||
}
|
||||
}
|
||||
return cast(uint)-1;
|
||||
}
|
||||
}
|
||||
}
|
5
bin/propane
Executable file
5
bin/propane
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require "propane"
|
||||
|
||||
exit Propane::CLI.run(ARGV.dup)
|
101
imbecile.cc
101
imbecile.cc
@ -1,101 +0,0 @@
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include "refptr.h"
|
||||
#include "Parser.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
string buildOutputFilename(string & input_fname);
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
int longind = 1;
|
||||
int opt;
|
||||
Parser p;
|
||||
string outfile;
|
||||
|
||||
static struct option longopts[] = {
|
||||
/* name, has_arg, flag, val */
|
||||
{ "classname", required_argument, NULL, 'c' },
|
||||
{ "extension", required_argument, NULL, 'e' },
|
||||
{ "namespace", required_argument, NULL, 'n' },
|
||||
{ "outfile", required_argument, NULL, 'o' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
|
||||
{
|
||||
switch (opt)
|
||||
{
|
||||
case 'c': /* classname */
|
||||
p.setClassName(optarg);
|
||||
break;
|
||||
case 'e': /* extension */
|
||||
p.setExtension(optarg);
|
||||
break;
|
||||
case 'n': /* namespace */
|
||||
p.setNamespace(optarg);
|
||||
break;
|
||||
case 'o': /* outfile */
|
||||
outfile = optarg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (optind >= argc)
|
||||
{
|
||||
cerr << "Usage: imbecile [options] <input-file>" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
string input_fname = argv[optind];
|
||||
ifstream ifs;
|
||||
ifs.open(input_fname.c_str(), ios::binary);
|
||||
if (!ifs.is_open())
|
||||
{
|
||||
cerr << "Error opening input file: '" << input_fname << "'";
|
||||
return 2;
|
||||
}
|
||||
ifs.seekg(0, ios_base::end);
|
||||
int size = ifs.tellg();
|
||||
ifs.seekg(0, ios_base::beg);
|
||||
char * buff = new char[size];
|
||||
ifs.read(buff, size);
|
||||
ifs.close();
|
||||
|
||||
if (outfile == "")
|
||||
outfile = buildOutputFilename(input_fname);
|
||||
|
||||
if (!p.parseInputFile(buff, size))
|
||||
{
|
||||
cerr << "Error parsing " << input_fname << endl;
|
||||
return 3;
|
||||
}
|
||||
if (!p.write(outfile))
|
||||
{
|
||||
cerr << "Error processing " << input_fname << endl;
|
||||
return 4;
|
||||
}
|
||||
|
||||
delete[] buff;
|
||||
return 0;
|
||||
}
|
||||
|
||||
string buildOutputFilename(string & input_fname)
|
||||
{
|
||||
string outfile;
|
||||
size_t len = input_fname.length();
|
||||
if (len > 2 && input_fname.substr(len - 2) == ".I")
|
||||
{
|
||||
outfile = input_fname.substr(0, len - 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
outfile = input_fname;
|
||||
}
|
||||
return outfile;
|
||||
}
|
137
lib/propane.rb
Normal file
137
lib/propane.rb
Normal file
@ -0,0 +1,137 @@
|
||||
require "erb"
|
||||
require "set"
|
||||
require_relative "propane/cli"
|
||||
require_relative "propane/code_point_range"
|
||||
require_relative "propane/fa"
|
||||
require_relative "propane/fa/state"
|
||||
require_relative "propane/fa/state/transition"
|
||||
require_relative "propane/lexer"
|
||||
require_relative "propane/lexer/dfa"
|
||||
require_relative "propane/parser"
|
||||
require_relative "propane/parser/item"
|
||||
require_relative "propane/parser/item_set"
|
||||
require_relative "propane/regex"
|
||||
require_relative "propane/regex/nfa"
|
||||
require_relative "propane/regex/unit"
|
||||
require_relative "propane/rule"
|
||||
require_relative "propane/token"
|
||||
require_relative "propane/version"
|
||||
|
||||
class Propane
|
||||
|
||||
# EOF.
|
||||
TOKEN_EOF = 0xFFFFFFFC
|
||||
|
||||
# Decoding error.
|
||||
TOKEN_DECODE_ERROR = 0xFFFFFFFD
|
||||
|
||||
# Token ID for a "dropped" token.
|
||||
TOKEN_DROP = 0xFFFFFFFE
|
||||
|
||||
# Invalid token ID.
|
||||
TOKEN_NONE = 0xFFFFFFFF
|
||||
|
||||
class Error < RuntimeError
|
||||
end
|
||||
|
||||
def initialize(input)
|
||||
@tokens = {}
|
||||
@rules = {}
|
||||
input = input.gsub("\r\n", "\n")
|
||||
while !input.empty?
|
||||
parse_grammar(input)
|
||||
end
|
||||
end
|
||||
|
||||
def generate(output_file, log_file)
|
||||
expand_rules
|
||||
lexer = Lexer.new(@tokens)
|
||||
parser = Parser.new(@tokens, @rules)
|
||||
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
|
||||
result = erb.result(binding.clone)
|
||||
File.open(output_file, "wb") do |fh|
|
||||
fh.write(result)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_grammar(input)
|
||||
if input.slice!(/\A\s+/)
|
||||
# Skip white space.
|
||||
elsif input.slice!(/\A#.*\n/)
|
||||
# Skip comment lines.
|
||||
elsif input.slice!(/\Amodule\s+(\S+)\n/)
|
||||
@modulename = $1
|
||||
elsif input.slice!(/\Aclass\s+(\S+)\n/)
|
||||
@classname = $1
|
||||
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
|
||||
name, pattern = $1, $2
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
end
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
if @tokens[name]
|
||||
raise Error.new("Duplicate token name #{name}")
|
||||
else
|
||||
@tokens[name] = Token.new(name, pattern, @tokens.size)
|
||||
end
|
||||
elsif input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens[name] = Token.new(nil, pattern, @tokens.size)
|
||||
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, components, code = $1, $2, $3
|
||||
components = components.strip.split(/\s+/)
|
||||
@rules[rule_name] ||= Rule.new(rule_name, @rules.size)
|
||||
@rules[rule_name].add_pattern(components, code)
|
||||
else
|
||||
if input.size > 25
|
||||
input = input.slice(0..20) + "..."
|
||||
end
|
||||
raise Error.new("Unexpected grammar input: #{input}")
|
||||
end
|
||||
end
|
||||
|
||||
def expand_rules
|
||||
@rules.each do |rule_name, rule|
|
||||
if @tokens.include?(rule_name)
|
||||
raise Error.new("Rule name collides with token name #{rule_name}")
|
||||
end
|
||||
end
|
||||
unless @rules["Start"]
|
||||
raise Error.new("Start rule not found")
|
||||
end
|
||||
@rules.each do |rule_name, rule|
|
||||
rule.patterns.each do |rule|
|
||||
rule.components.map! do |component|
|
||||
if @tokens[component]
|
||||
@tokens[component]
|
||||
elsif @rules[component]
|
||||
@rules[component]
|
||||
else
|
||||
raise Error.new("Symbol #{component} not found")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
|
||||
def run(input_file, output_file, log_file)
|
||||
begin
|
||||
propane = Propane.new(File.read(input_file))
|
||||
propane.generate(output_file, log_file)
|
||||
rescue Error => e
|
||||
$stderr.puts e.message
|
||||
return 2
|
||||
end
|
||||
return 0
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
54
lib/propane/cli.rb
Normal file
54
lib/propane/cli.rb
Normal file
@ -0,0 +1,54 @@
|
||||
class Propane
|
||||
module CLI
|
||||
|
||||
USAGE = <<EOF
|
||||
Usage: #{$0} [options] <input-file> <output-file>
|
||||
Options:
|
||||
--log LOG Write log file
|
||||
--version Show program version and exit
|
||||
-h, --help Show this usage and exit
|
||||
EOF
|
||||
|
||||
class << self
|
||||
|
||||
def run(args)
|
||||
params = []
|
||||
log_file = nil
|
||||
i = 0
|
||||
while i < args.size
|
||||
arg = args[i]
|
||||
case arg
|
||||
when "--log"
|
||||
if i + 1 < args.size
|
||||
i += 1
|
||||
log_file = args[i]
|
||||
end
|
||||
when "--version"
|
||||
puts "propane v#{VERSION}"
|
||||
return 0
|
||||
when "-h", "--help"
|
||||
puts USAGE
|
||||
return 0
|
||||
when /^-/
|
||||
$stderr.puts "Error: unknown option #{arg}"
|
||||
return 1
|
||||
else
|
||||
params << arg
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
if params.size != 2
|
||||
$stderr.puts "Error: specify input and output files"
|
||||
return 1
|
||||
end
|
||||
unless File.readable?(params[0])
|
||||
$stderr.puts "Error: cannot read #{params[0]}"
|
||||
return 2
|
||||
end
|
||||
Propane.run(*params, log_file)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
84
lib/propane/code_point_range.rb
Normal file
84
lib/propane/code_point_range.rb
Normal file
@ -0,0 +1,84 @@
|
||||
class Propane
|
||||
class CodePointRange
|
||||
|
||||
MAX_CODE_POINT = 0xFFFFFFFF
|
||||
|
||||
attr_reader :first
|
||||
attr_reader :last
|
||||
|
||||
include Comparable
|
||||
|
||||
# Build a CodePointRange
|
||||
def initialize(first, last = nil)
|
||||
@first = first.ord
|
||||
if last
|
||||
@last = last.ord
|
||||
if @last < @first
|
||||
raise "Invalid CodePointRange: last code point must be > first code point"
|
||||
end
|
||||
else
|
||||
@last = @first
|
||||
end
|
||||
end
|
||||
|
||||
def <=>(other)
|
||||
if self.first != other.first
|
||||
@first <=> other.first
|
||||
else
|
||||
@last <=> other.last
|
||||
end
|
||||
end
|
||||
|
||||
def include?(v)
|
||||
if v.is_a?(CodePointRange)
|
||||
@first <= v.first && v.last <= @last
|
||||
else
|
||||
@first <= v && v <= @last
|
||||
end
|
||||
end
|
||||
|
||||
def size
|
||||
@last - @first + 1
|
||||
end
|
||||
|
||||
class << self
|
||||
|
||||
def invert_ranges(code_point_ranges)
|
||||
new_ranges = []
|
||||
last_cp = -1
|
||||
code_point_ranges.sort.each do |code_point_range|
|
||||
if code_point_range.first > (last_cp + 1)
|
||||
new_ranges << CodePointRange.new(last_cp + 1, code_point_range.first - 1)
|
||||
last_cp = code_point_range.last
|
||||
else
|
||||
last_cp = [last_cp, code_point_range.last].max
|
||||
end
|
||||
end
|
||||
if last_cp < MAX_CODE_POINT
|
||||
new_ranges << CodePointRange.new(last_cp + 1, MAX_CODE_POINT)
|
||||
end
|
||||
new_ranges
|
||||
end
|
||||
|
||||
def first_subrange(code_point_ranges)
|
||||
code_point_ranges.sort.reduce do |result, code_point_range|
|
||||
if code_point_range.include?(result.first)
|
||||
if code_point_range.last < result.last
|
||||
code_point_range
|
||||
else
|
||||
result
|
||||
end
|
||||
else
|
||||
if code_point_range.first <= result.last
|
||||
CodePointRange.new(result.first, code_point_range.first - 1)
|
||||
else
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
61
lib/propane/fa.rb
Normal file
61
lib/propane/fa.rb
Normal file
@ -0,0 +1,61 @@
|
||||
class Propane
|
||||
|
||||
class FA
|
||||
|
||||
attr_reader :start_state
|
||||
|
||||
def initialize
|
||||
@start_state = State.new
|
||||
end
|
||||
|
||||
def to_s
|
||||
chr = lambda do |value|
|
||||
if value < 32 || value > 127
|
||||
"{#{value}}"
|
||||
else
|
||||
value.chr
|
||||
end
|
||||
end
|
||||
rv = ""
|
||||
states = enumerate
|
||||
states.each do |state, id|
|
||||
accepts_s = state.accepts ? " #{state.accepts}" : ""
|
||||
rv += "#{id}#{accepts_s}:\n"
|
||||
state.transitions.each do |transition|
|
||||
if transition.nil?
|
||||
range_s = "nil"
|
||||
else
|
||||
range_s = chr[transition.code_point_range.first]
|
||||
if transition.code_point_range.size > 1
|
||||
range_s += "-" + chr[transition.code_point_range.last]
|
||||
end
|
||||
end
|
||||
accepts_s = transition.destination.accepts ? " #{transition.destination.accepts}" : ""
|
||||
rv += " #{range_s} => #{states[transition.destination]}#{accepts_s}\n"
|
||||
end
|
||||
end
|
||||
rv
|
||||
end
|
||||
|
||||
def enumerate
|
||||
@_enumerated ||=
|
||||
begin
|
||||
id = 0
|
||||
states = {}
|
||||
visit = lambda do |state|
|
||||
unless states.include?(state)
|
||||
states[state] = id
|
||||
id += 1
|
||||
state.transitions.each do |transition|
|
||||
visit[transition.destination]
|
||||
end
|
||||
end
|
||||
end
|
||||
visit[@start_state]
|
||||
states
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
51
lib/propane/fa/state.rb
Normal file
51
lib/propane/fa/state.rb
Normal file
@ -0,0 +1,51 @@
|
||||
class Propane
|
||||
class FA
|
||||
|
||||
class State
|
||||
|
||||
attr_accessor :accepts
|
||||
attr_reader :transitions
|
||||
|
||||
def initialize
|
||||
@transitions = []
|
||||
end
|
||||
|
||||
def add_transition(code_point_range, destination)
|
||||
@transitions << Transition.new(code_point_range, destination)
|
||||
end
|
||||
|
||||
# Determine the set of states that can be reached by nil transitions.
|
||||
# from this state.
|
||||
#
|
||||
# @return [Set<NFA::State>]
|
||||
# Set of states.
|
||||
def nil_transition_states
|
||||
states = Set[self]
|
||||
analyze_state = lambda do |state|
|
||||
state.nil_transitions.each do |transition|
|
||||
unless states.include?(transition.destination)
|
||||
states << transition.destination
|
||||
analyze_state[transition.destination]
|
||||
end
|
||||
end
|
||||
end
|
||||
analyze_state[self]
|
||||
states
|
||||
end
|
||||
|
||||
def nil_transitions
|
||||
@transitions.select do |transition|
|
||||
transition.nil?
|
||||
end
|
||||
end
|
||||
|
||||
def cp_transitions
|
||||
@transitions.reject do |transition|
|
||||
transition.nil?
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
23
lib/propane/fa/state/transition.rb
Normal file
23
lib/propane/fa/state/transition.rb
Normal file
@ -0,0 +1,23 @@
|
||||
class Propane
|
||||
class FA
|
||||
class State
|
||||
|
||||
class Transition
|
||||
|
||||
attr_reader :code_point_range
|
||||
attr_reader :destination
|
||||
|
||||
def initialize(code_point_range, destination)
|
||||
@code_point_range = code_point_range
|
||||
@destination = destination
|
||||
end
|
||||
|
||||
def nil?
|
||||
@code_point_range.nil?
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
13
lib/propane/lexer.rb
Normal file
13
lib/propane/lexer.rb
Normal file
@ -0,0 +1,13 @@
|
||||
class Propane
|
||||
class Lexer
|
||||
|
||||
# @return [DFA]
|
||||
# Lexer DFA.
|
||||
attr_accessor :dfa
|
||||
|
||||
def initialize(tokens)
|
||||
@dfa = DFA.new(tokens)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
118
lib/propane/lexer/dfa.rb
Normal file
118
lib/propane/lexer/dfa.rb
Normal file
@ -0,0 +1,118 @@
|
||||
class Propane
|
||||
class Lexer
|
||||
|
||||
class DFA < FA
|
||||
|
||||
def initialize(tokens)
|
||||
super()
|
||||
start_nfa = Regex::NFA.new
|
||||
tokens.each do |name, token|
|
||||
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
||||
end
|
||||
@nfa_state_sets = {}
|
||||
@states = []
|
||||
@to_process = Set.new
|
||||
nil_transition_states = start_nfa.start_state.nil_transition_states
|
||||
register_nfa_state_set(nil_transition_states)
|
||||
while @to_process.size > 0
|
||||
state_set = @to_process.first
|
||||
@to_process.delete(state_set)
|
||||
process_nfa_state_set(state_set)
|
||||
end
|
||||
@start_state = @states[0]
|
||||
end
|
||||
|
||||
def build_tables
|
||||
transition_table = []
|
||||
state_table = []
|
||||
states = enumerate
|
||||
states.each do |state, id|
|
||||
accepts =
|
||||
if state.accepts.nil?
|
||||
TOKEN_NONE
|
||||
elsif state.accepts.name
|
||||
state.accepts.id
|
||||
else
|
||||
TOKEN_DROP
|
||||
end
|
||||
state_table << {
|
||||
transition_table_index: transition_table.size,
|
||||
n_transitions: state.transitions.size,
|
||||
accepts: accepts,
|
||||
}
|
||||
state.transitions.each do |transition|
|
||||
transition_table << {
|
||||
first: transition.code_point_range.first,
|
||||
last: transition.code_point_range.last,
|
||||
destination: states[transition.destination],
|
||||
}
|
||||
end
|
||||
end
|
||||
[transition_table, state_table]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def register_nfa_state_set(nfa_state_set)
|
||||
unless @nfa_state_sets.include?(nfa_state_set)
|
||||
state_id = @states.size
|
||||
@nfa_state_sets[nfa_state_set] = state_id
|
||||
@states << State.new
|
||||
@to_process << nfa_state_set
|
||||
end
|
||||
end
|
||||
|
||||
def process_nfa_state_set(nfa_state_set)
|
||||
state_id = @nfa_state_sets[nfa_state_set]
|
||||
state = @states[state_id]
|
||||
if state_id > 0
|
||||
nfa_state_set.each do |nfa_state|
|
||||
if nfa_state.accepts
|
||||
if state.accepts
|
||||
if nfa_state.accepts.id < state.accepts.id
|
||||
state.accepts = nfa_state.accepts
|
||||
end
|
||||
else
|
||||
state.accepts = nfa_state.accepts
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
transitions = transitions_for(nfa_state_set)
|
||||
while transitions.size > 0
|
||||
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
|
||||
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
|
||||
if transition.code_point_range.include?(subrange)
|
||||
result << transition.destination
|
||||
end
|
||||
result
|
||||
end
|
||||
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
|
||||
result + dest_nfa_state.nil_transition_states
|
||||
end
|
||||
register_nfa_state_set(dest_nfa_states)
|
||||
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
|
||||
state.add_transition(subrange, dest_state)
|
||||
transitions.delete_if do |transition|
|
||||
transition.code_point_range.last <= subrange.last
|
||||
end
|
||||
transitions.map! do |transition|
|
||||
if transition.code_point_range.first <= subrange.last
|
||||
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
|
||||
else
|
||||
transition
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def transitions_for(nfa_state_set)
|
||||
nfa_state_set.reduce([]) do |result, state|
|
||||
result + state.cp_transitions
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
84
lib/propane/parser.rb
Normal file
84
lib/propane/parser.rb
Normal file
@ -0,0 +1,84 @@
|
||||
class Propane
|
||||
|
||||
class Parser
|
||||
|
||||
def initialize(tokens, rules)
|
||||
@token_eof = Token.new("$", nil, TOKEN_EOF)
|
||||
@item_sets = []
|
||||
@item_sets_set = {}
|
||||
start_items = rules["Start"].patterns.map do |pattern|
|
||||
pattern.components << @token_eof
|
||||
Item.new(pattern, 0)
|
||||
end
|
||||
eval_item_sets = Set.new
|
||||
eval_item_sets << ItemSet.new(start_items)
|
||||
|
||||
while eval_item_sets.size > 0
|
||||
this_eval_item_sets = eval_item_sets
|
||||
eval_item_sets = Set.new
|
||||
this_eval_item_sets.each do |item_set|
|
||||
unless @item_sets_set.include?(item_set)
|
||||
item_set.id = @item_sets.size
|
||||
@item_sets << item_set
|
||||
@item_sets_set[item_set] = item_set
|
||||
item_set.follow_symbols.each do |follow_symbol|
|
||||
unless follow_symbol == @token_eof
|
||||
follow_set = item_set.build_follow_set(follow_symbol)
|
||||
eval_item_sets << follow_set
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@item_sets.each do |item_set|
|
||||
process_item_set(item_set)
|
||||
puts "Item set #{item_set.id}:"
|
||||
ids = item_set.in_sets.map(&:id)
|
||||
if ids.size > 0
|
||||
puts " (in from #{ids.join(", ")})"
|
||||
end
|
||||
puts item_set
|
||||
item_set.follow_item_set.each do |follow_symbol, follow_item_set|
|
||||
puts " #{follow_symbol.name} => #{follow_item_set.id}"
|
||||
end
|
||||
puts
|
||||
end
|
||||
end
|
||||
|
||||
def build_tables
|
||||
shift_table = []
|
||||
state_table = []
|
||||
@item_sets.each do |item_set|
|
||||
shift_entries = item_set.follow_symbols.select do |follow_symbol|
|
||||
follow_symbol.is_a?(Token)
|
||||
end.map do |follow_symbol|
|
||||
{
|
||||
token_id: follow_symbol.id,
|
||||
state_id: item_set.follow_item_set[follow_symbol].id,
|
||||
}
|
||||
end
|
||||
state_table << {
|
||||
shift_index: shift_table.size,
|
||||
n_shifts: shift_entries.size,
|
||||
}
|
||||
shift_table += shift_entries
|
||||
end
|
||||
[state_table, shift_table]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_item_set(item_set)
|
||||
item_set.follow_symbols.each do |follow_symbol|
|
||||
unless follow_symbol == @token_eof
|
||||
follow_set = @item_sets_set[item_set.build_follow_set(follow_symbol)]
|
||||
item_set.follow_item_set[follow_symbol] = follow_set
|
||||
follow_set.in_sets << item_set
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
69
lib/propane/parser/item.rb
Normal file
69
lib/propane/parser/item.rb
Normal file
@ -0,0 +1,69 @@
|
||||
class Propane
|
||||
class Parser
|
||||
|
||||
class Item
|
||||
|
||||
attr_reader :pattern
|
||||
attr_reader :position
|
||||
|
||||
def initialize(pattern, position)
|
||||
@pattern = pattern
|
||||
@position = position
|
||||
end
|
||||
|
||||
def next_component
|
||||
@pattern.components[@position]
|
||||
end
|
||||
|
||||
def hash
|
||||
[@pattern, @position].hash
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
@pattern == other.pattern && @position == other.position
|
||||
end
|
||||
|
||||
def eql?(other)
|
||||
self == other
|
||||
end
|
||||
|
||||
def closed_items
|
||||
if @pattern.components[@position].is_a?(Rule)
|
||||
@pattern.components[@position].patterns.map do |pattern|
|
||||
Item.new(pattern, 0)
|
||||
end
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def follow_symbol
|
||||
@pattern.components[@position]
|
||||
end
|
||||
|
||||
def followed_by?(symbol)
|
||||
follow_symbol == symbol
|
||||
end
|
||||
|
||||
def next_position
|
||||
Item.new(@pattern, @position + 1)
|
||||
end
|
||||
|
||||
def to_s
|
||||
parts = []
|
||||
@pattern.components.each_with_index do |symbol, index|
|
||||
if @position == index
|
||||
parts << "."
|
||||
end
|
||||
parts << symbol.name
|
||||
end
|
||||
if @position == @pattern.components.size
|
||||
parts << "."
|
||||
end
|
||||
"#{@pattern.rule.name} -> #{parts.join(" ")}"
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
76
lib/propane/parser/item_set.rb
Normal file
76
lib/propane/parser/item_set.rb
Normal file
@ -0,0 +1,76 @@
|
||||
class Propane
|
||||
class Parser
|
||||
|
||||
class ItemSet
|
||||
|
||||
attr_reader :items
|
||||
|
||||
attr_accessor :id
|
||||
|
||||
# @return [Hash]
|
||||
# Maps a follow symbol to its item set.
|
||||
attr_reader :follow_item_set
|
||||
|
||||
# @return [Set]
|
||||
# Item sets leading to this item set.
|
||||
attr_reader :in_sets
|
||||
|
||||
def initialize(items)
|
||||
@items = Set.new(items)
|
||||
@follow_item_set = {}
|
||||
@in_sets = Set.new
|
||||
close!
|
||||
end
|
||||
|
||||
def follow_symbols
|
||||
Set.new(@items.map(&:follow_symbol).compact)
|
||||
end
|
||||
|
||||
def build_follow_set(symbol)
|
||||
ItemSet.new(items_followed_by(symbol).map(&:next_position))
|
||||
end
|
||||
|
||||
def hash
|
||||
@items.hash
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
@items.eql?(other.items)
|
||||
end
|
||||
|
||||
def eql?(other)
|
||||
self == other
|
||||
end
|
||||
|
||||
def to_s
|
||||
@items.map(&:to_s).join("\n")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def close!
|
||||
eval_items = @items
|
||||
while eval_items.size > 0
|
||||
this_eval_items = eval_items
|
||||
eval_items = Set.new
|
||||
this_eval_items.each do |item|
|
||||
item.closed_items.each do |new_item|
|
||||
unless @items.include?(new_item)
|
||||
eval_items << new_item
|
||||
end
|
||||
end
|
||||
end
|
||||
@items += eval_items
|
||||
end
|
||||
end
|
||||
|
||||
def items_followed_by(symbol)
|
||||
@items.select do |item|
|
||||
item.followed_by?(symbol)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
162
lib/propane/regex.rb
Normal file
162
lib/propane/regex.rb
Normal file
@ -0,0 +1,162 @@
|
||||
class Propane
|
||||
class Regex
|
||||
|
||||
attr_reader :unit
|
||||
attr_reader :nfa
|
||||
|
||||
def initialize(pattern)
|
||||
@pattern = pattern.dup
|
||||
@unit = parse_alternates
|
||||
@nfa = @unit.to_nfa
|
||||
if @pattern != ""
|
||||
raise Error.new(%[Unexpected "#{@pattern}" in pattern])
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_alternates
|
||||
au = AlternatesUnit.new
|
||||
while @pattern != ""
|
||||
c = @pattern[0]
|
||||
return au if c == ")"
|
||||
@pattern.slice!(0)
|
||||
case c
|
||||
when "["
|
||||
au << parse_character_class
|
||||
when "("
|
||||
au << parse_group
|
||||
when "*", "+", "?", "{"
|
||||
if last_unit = au.last_unit
|
||||
case c
|
||||
when "*"
|
||||
min_count, max_count = 0, nil
|
||||
when "+"
|
||||
min_count, max_count = 1, nil
|
||||
when "?"
|
||||
min_count, max_count = 0, 1
|
||||
when "{"
|
||||
min_count, max_count = parse_curly_count
|
||||
end
|
||||
mu = MultiplicityUnit.new(last_unit, min_count, max_count)
|
||||
au.replace_last!(mu)
|
||||
else
|
||||
raise Error.new("#{c} follows nothing")
|
||||
end
|
||||
when "|"
|
||||
au.new_alternate!
|
||||
when "\\"
|
||||
au << parse_backslash
|
||||
when "."
|
||||
au << period_character_class
|
||||
else
|
||||
au << CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
au
|
||||
end
|
||||
|
||||
def parse_group
|
||||
au = parse_alternates
|
||||
if @pattern[0] != ")"
|
||||
raise Error.new("Unterminated group in pattern")
|
||||
end
|
||||
@pattern.slice!(0)
|
||||
au
|
||||
end
|
||||
|
||||
def parse_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
index = 0
|
||||
loop do
|
||||
if @pattern == ""
|
||||
raise Error.new("Unterminated character class")
|
||||
end
|
||||
c = @pattern.slice!(0)
|
||||
if c == "]"
|
||||
break
|
||||
elsif c == "^" && index == 0
|
||||
ccu.negate = true
|
||||
elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
elsif c == "\\"
|
||||
ccu << parse_backslash
|
||||
elsif c == "-" && @pattern[0] != "]"
|
||||
begin_cu = ccu.last_unit
|
||||
unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.code_point_range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
if @pattern[0] == "\\"
|
||||
@pattern.slice!(0)
|
||||
end_cu = parse_backslash
|
||||
unless end_cu.is_a?(CharacterRangeUnit) && end_cu.code_point_range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
max_code_point = end_cu.code_point
|
||||
else
|
||||
max_code_point = @pattern[0].ord
|
||||
@pattern.slice!(0)
|
||||
end
|
||||
cru = CharacterRangeUnit.new(begin_cu.first, max_code_point)
|
||||
ccu.replace_last!(cru)
|
||||
else
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
ccu
|
||||
end
|
||||
|
||||
def parse_curly_count
|
||||
if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
|
||||
min_count, comma, max_count, pattern = $1, $2, $3, $4
|
||||
min_count = min_count.to_i
|
||||
if comma.to_s == ""
|
||||
max_count = min_count
|
||||
elsif max_count.to_s != ""
|
||||
max_count = max_count.to_i
|
||||
if max_count < min_count
|
||||
raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
|
||||
end
|
||||
else
|
||||
max_count = nil
|
||||
end
|
||||
@pattern = pattern
|
||||
[min_count, max_count]
|
||||
else
|
||||
raise Error.new("Unexpected match count at #{@pattern}")
|
||||
end
|
||||
end
|
||||
|
||||
def parse_backslash
|
||||
if @pattern == ""
|
||||
raise Error.new("Error: unfollowed \\")
|
||||
else
|
||||
c = @pattern.slice!(0)
|
||||
case c
|
||||
when "d"
|
||||
CharacterRangeUnit.new("0", "9")
|
||||
when "s"
|
||||
ccu = CharacterClassUnit.new
|
||||
ccu << CharacterRangeUnit.new(" ")
|
||||
ccu << CharacterRangeUnit.new("\t")
|
||||
ccu << CharacterRangeUnit.new("\r")
|
||||
ccu << CharacterRangeUnit.new("\n")
|
||||
ccu << CharacterRangeUnit.new("\f")
|
||||
ccu << CharacterRangeUnit.new("\v")
|
||||
ccu
|
||||
else
|
||||
CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def period_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
|
||||
ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
|
||||
ccu
|
||||
end
|
||||
|
||||
end
|
||||
end
|
26
lib/propane/regex/nfa.rb
Normal file
26
lib/propane/regex/nfa.rb
Normal file
@ -0,0 +1,26 @@
|
||||
class Propane
|
||||
class Regex
|
||||
|
||||
class NFA < FA
|
||||
|
||||
attr_reader :end_state
|
||||
|
||||
def initialize
|
||||
super()
|
||||
@end_state = State.new
|
||||
end
|
||||
|
||||
class << self
|
||||
|
||||
def empty
|
||||
nfa = NFA.new
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
nfa
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
172
lib/propane/regex/unit.rb
Normal file
172
lib/propane/regex/unit.rb
Normal file
@ -0,0 +1,172 @@
|
||||
class Propane
|
||||
class Regex
|
||||
|
||||
class Unit
|
||||
end
|
||||
|
||||
class SequenceUnit < Unit
|
||||
attr_accessor :units
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def to_nfa
|
||||
if @units.empty?
|
||||
NFA.empty
|
||||
else
|
||||
nfa = NFA.new
|
||||
unit_nfas = @units.map do |unit|
|
||||
unit.to_nfa
|
||||
end
|
||||
nfa.start_state.add_transition(nil, unit_nfas[0].start_state)
|
||||
unit_nfas.reduce do |prev_nfa, next_nfa|
|
||||
prev_nfa.end_state.add_transition(nil, next_nfa.start_state)
|
||||
next_nfa
|
||||
end.end_state.add_transition(nil, nfa.end_state)
|
||||
nfa
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class AlternatesUnit < Unit
|
||||
attr_accessor :alternates
|
||||
def initialize
|
||||
@alternates = []
|
||||
new_alternate!
|
||||
end
|
||||
def new_alternate!
|
||||
@alternates << SequenceUnit.new
|
||||
end
|
||||
def <<(unit)
|
||||
@alternates[-1] << unit
|
||||
end
|
||||
def last_unit
|
||||
@alternates[-1][-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@alternates[-1][-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
if @alternates.size == 0
|
||||
NFA.empty
|
||||
elsif @alternates.size == 1
|
||||
@alternates[0].to_nfa
|
||||
else
|
||||
nfa = NFA.new
|
||||
alternate_nfas = @alternates.map do |alternate|
|
||||
alternate.to_nfa
|
||||
end
|
||||
alternate_nfas.each do |alternate_nfa|
|
||||
nfa.start_state.add_transition(nil, alternate_nfa.start_state)
|
||||
alternate_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterRangeUnit < Unit
|
||||
attr_reader :code_point_range
|
||||
def initialize(c1, c2 = nil)
|
||||
@code_point_range = CodePointRange.new(c1, c2)
|
||||
end
|
||||
def first
|
||||
@code_point_range.first
|
||||
end
|
||||
def last
|
||||
@code_point_range.last
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
nfa.start_state.add_transition(@code_point_range, nfa.end_state)
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterClassUnit < Unit
|
||||
attr_accessor :units
|
||||
attr_accessor :negate
|
||||
def initialize
|
||||
@units = []
|
||||
@negate = false
|
||||
end
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def <<(thing)
|
||||
if thing.is_a?(CharacterClassUnit)
|
||||
thing.each do |ccu_unit|
|
||||
@units << ccu_unit
|
||||
end
|
||||
else
|
||||
@units << thing
|
||||
end
|
||||
end
|
||||
def last_unit
|
||||
@units[-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@units[-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
if @units.empty?
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
code_point_ranges = @units.map(&:code_point_range)
|
||||
if @negate
|
||||
code_point_ranges = CodePointRange.invert_ranges(code_point_ranges)
|
||||
end
|
||||
code_point_ranges.each do |code_point_range|
|
||||
nfa.start_state.add_transition(code_point_range, nfa.end_state)
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
class MultiplicityUnit < Unit
|
||||
attr_accessor :unit
|
||||
attr_accessor :min_count
|
||||
attr_accessor :max_count
|
||||
def initialize(unit, min_count, max_count)
|
||||
@unit = unit
|
||||
@min_count = min_count
|
||||
@max_count = max_count
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
last_state = nfa.start_state
|
||||
unit_nfa = nil
|
||||
@min_count.times do
|
||||
unit_nfa = @unit.to_nfa
|
||||
last_state.add_transition(nil, unit_nfa.start_state)
|
||||
last_state = unit_nfa.end_state
|
||||
end
|
||||
last_state.add_transition(nil, nfa.end_state)
|
||||
if @max_count.nil?
|
||||
if @min_count == 0
|
||||
unit_nfa = @unit.to_nfa
|
||||
last_state.add_transition(nil, unit_nfa.start_state)
|
||||
end
|
||||
unit_nfa.end_state.add_transition(nil, unit_nfa.start_state)
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
(@max_count - @min_count).times do
|
||||
unit_nfa = @unit.to_nfa
|
||||
last_state.add_transition(nil, unit_nfa.start_state)
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
last_state = unit_nfa.end_state
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
39
lib/propane/rule.rb
Normal file
39
lib/propane/rule.rb
Normal file
@ -0,0 +1,39 @@
|
||||
class Propane
|
||||
|
||||
class Rule
|
||||
|
||||
class Pattern
|
||||
|
||||
attr_reader :rule
|
||||
|
||||
attr_reader :components
|
||||
|
||||
attr_reader :code
|
||||
|
||||
def initialize(rule, components, code)
|
||||
@rule = rule
|
||||
@components = components
|
||||
@code = code
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
attr_reader :id
|
||||
|
||||
attr_reader :name
|
||||
|
||||
attr_reader :patterns
|
||||
|
||||
def initialize(name, id)
|
||||
@name = name
|
||||
@id = id
|
||||
@patterns = []
|
||||
end
|
||||
|
||||
def add_pattern(components, code)
|
||||
@patterns << Pattern.new(self, components, code)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
42
lib/propane/token.rb
Normal file
42
lib/propane/token.rb
Normal file
@ -0,0 +1,42 @@
|
||||
class Propane
|
||||
|
||||
class Token
|
||||
|
||||
# @return [String]
|
||||
# Token name.
|
||||
attr_reader :name
|
||||
|
||||
# @return [String]
|
||||
# Token pattern.
|
||||
attr_reader :pattern
|
||||
|
||||
# @return [Integer]
|
||||
# Token ID.
|
||||
attr_reader :id
|
||||
|
||||
# @return [Regex::NFA]
|
||||
# Regex NFA for matching the token.
|
||||
attr_reader :nfa
|
||||
|
||||
def initialize(name, pattern, id)
|
||||
@name = name
|
||||
@pattern = pattern
|
||||
@id = id
|
||||
unless pattern.nil?
|
||||
regex = Regex.new(pattern)
|
||||
regex.nfa.end_state.accepts = self
|
||||
@nfa = regex.nfa
|
||||
end
|
||||
end
|
||||
|
||||
def c_name
|
||||
@name.upcase
|
||||
end
|
||||
|
||||
def to_s
|
||||
@name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
3
lib/propane/version.rb
Normal file
3
lib/propane/version.rb
Normal file
@ -0,0 +1,3 @@
|
||||
class Propane
|
||||
VERSION = "0.1.0"
|
||||
end
|
2
propane.sh
Executable file
2
propane.sh
Executable file
@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
exec bundle exec ruby -Ilib bin/propane "$@"
|
1
refptr
1
refptr
@ -1 +0,0 @@
|
||||
Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef
|
87
spec/propane/code_point_range_spec.rb
Normal file
87
spec/propane/code_point_range_spec.rb
Normal file
@ -0,0 +1,87 @@
|
||||
class Propane
|
||||
describe CodePointRange do
|
||||
|
||||
describe "#<=>" do
|
||||
it "sorts ranges" do
|
||||
arr = [
|
||||
CodePointRange.new(100,102),
|
||||
CodePointRange.new(65, 68),
|
||||
CodePointRange.new(65, 65),
|
||||
CodePointRange.new(100, 100),
|
||||
CodePointRange.new(68, 70),
|
||||
]
|
||||
arr.sort!
|
||||
expect(arr[0]).to eq CodePointRange.new(65, 65)
|
||||
expect(arr[1]).to eq CodePointRange.new(65, 68)
|
||||
expect(arr[2]).to eq CodePointRange.new(68, 70)
|
||||
expect(arr[3]).to eq CodePointRange.new(100, 100)
|
||||
expect(arr[4]).to eq CodePointRange.new(100, 102)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#include?" do
|
||||
it "returns whether the code point is included in the range" do
|
||||
expect(CodePointRange.new(100).include?(100)).to be_truthy
|
||||
expect(CodePointRange.new(100, 100).include?(99)).to be_falsey
|
||||
expect(CodePointRange.new(100, 100).include?(101)).to be_falsey
|
||||
expect(CodePointRange.new(100, 120).include?(99)).to be_falsey
|
||||
expect(CodePointRange.new(100, 120).include?(100)).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(110)).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(120)).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(121)).to be_falsey
|
||||
end
|
||||
|
||||
it "returns whether the range is included in the range" do
|
||||
expect(CodePointRange.new(100).include?(CodePointRange.new(100))).to be_truthy
|
||||
expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99))).to be_falsey
|
||||
expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99, 100))).to be_falsey
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(90, 110))).to be_falsey
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 130))).to be_falsey
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 120))).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 110))).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 120))).to be_truthy
|
||||
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(102, 118))).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
describe ".invert_ranges" do
|
||||
it "inverts ranges" do
|
||||
expect(CodePointRange.invert_ranges(
|
||||
[CodePointRange.new(60, 90),
|
||||
CodePointRange.new(80, 85),
|
||||
CodePointRange.new(80, 100),
|
||||
CodePointRange.new(101),
|
||||
CodePointRange.new(200, 300)])).to eq [
|
||||
CodePointRange.new(0, 59),
|
||||
CodePointRange.new(102, 199),
|
||||
CodePointRange.new(301, 0xFFFFFFFF)]
|
||||
expect(CodePointRange.invert_ranges(
|
||||
[CodePointRange.new(0, 500),
|
||||
CodePointRange.new(7000, 0xFFFFFFFF)])).to eq [
|
||||
CodePointRange.new(501, 6999)]
|
||||
end
|
||||
end
|
||||
|
||||
describe ".first_subrange" do
|
||||
it "returns the first subrange to split" do
|
||||
expect(CodePointRange.first_subrange(
|
||||
[CodePointRange.new(65, 90),
|
||||
CodePointRange.new(66, 66),
|
||||
CodePointRange.new(80, 90)])).to eq CodePointRange.new(65)
|
||||
expect(CodePointRange.first_subrange(
|
||||
[CodePointRange.new(65, 90)])).to eq CodePointRange.new(65, 90)
|
||||
expect(CodePointRange.first_subrange(
|
||||
[CodePointRange.new(65, 90),
|
||||
CodePointRange.new(80, 90)])).to eq CodePointRange.new(65, 79)
|
||||
expect(CodePointRange.first_subrange(
|
||||
[CodePointRange.new(65, 90),
|
||||
CodePointRange.new(65, 100),
|
||||
CodePointRange.new(65, 95)])).to eq CodePointRange.new(65, 90)
|
||||
expect(CodePointRange.first_subrange(
|
||||
[CodePointRange.new(100, 120),
|
||||
CodePointRange.new(70, 90)])).to eq CodePointRange.new(70, 90)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
121
spec/propane/lexer/dfa_spec.rb
Normal file
121
spec/propane/lexer/dfa_spec.rb
Normal file
@ -0,0 +1,121 @@
|
||||
class TestLexer
|
||||
def initialize(token_dfa)
|
||||
@token_dfa = token_dfa
|
||||
end
|
||||
|
||||
def lex(input)
|
||||
input_chars = input.chars
|
||||
output = []
|
||||
while lexed_token = lex_token(input_chars)
|
||||
output << lexed_token
|
||||
input_chars.slice!(0, lexed_token[1].size)
|
||||
end
|
||||
unless input_chars.empty?
|
||||
raise "Unmatched input #{input_chars.join(" ")}"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def lex_token(input_chars)
|
||||
return nil if input_chars.empty?
|
||||
s = ""
|
||||
current_state = @token_dfa.start_state
|
||||
last_accepts = nil
|
||||
last_s = nil
|
||||
input_chars.each_with_index do |input_char, index|
|
||||
if next_state = transition(current_state, input_char)
|
||||
s += input_char
|
||||
current_state = next_state
|
||||
if current_state.accepts
|
||||
last_accepts = current_state.accepts
|
||||
last_s = s
|
||||
end
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
if last_accepts
|
||||
[last_accepts.name, last_s]
|
||||
end
|
||||
end
|
||||
|
||||
def transition(state, input_char)
|
||||
state.transitions.each do |transition|
|
||||
if transition.code_point_range.include?(input_char.ord)
|
||||
return transition.destination
|
||||
end
|
||||
end
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def run(grammar, input)
|
||||
propane = Propane.new(grammar)
|
||||
token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
|
||||
test_lexer = TestLexer.new(token_dfa)
|
||||
test_lexer.lex(input)
|
||||
end
|
||||
|
||||
describe Propane::Lexer::DFA do
|
||||
it "lexes a simple token" do
|
||||
expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
|
||||
token foo
|
||||
EOF
|
||||
end
|
||||
|
||||
it "lexes two tokens" do
|
||||
expected = [
|
||||
["foo", "foo"],
|
||||
["bar", "bar"],
|
||||
]
|
||||
expect(run(<<EOF, "foobar")).to eq expected
|
||||
token foo
|
||||
token bar
|
||||
EOF
|
||||
end
|
||||
|
||||
it "lexes the longer of multiple options" do
|
||||
expected = [
|
||||
["identifier", "foobar"],
|
||||
]
|
||||
expect(run(<<EOF, "foobar")).to eq expected
|
||||
token foo
|
||||
token bar
|
||||
token identifier [a-z]+
|
||||
EOF
|
||||
expected = [
|
||||
["plusplus", "++"],
|
||||
["plus", "+"],
|
||||
]
|
||||
expect(run(<<EOF, "+++")).to eq expected
|
||||
token plus \\+
|
||||
token plusplus \\+\\+
|
||||
EOF
|
||||
end
|
||||
|
||||
it "lexes whitespace" do
|
||||
expected = [
|
||||
["foo", "foo"],
|
||||
["WS", " \t"],
|
||||
["bar", "bar"],
|
||||
]
|
||||
expect(run(<<EOF, "foo \tbar")).to eq expected
|
||||
token foo
|
||||
token bar
|
||||
token WS \\s+
|
||||
EOF
|
||||
end
|
||||
|
||||
it "allows dropping a matched pattern" do
|
||||
expected = [
|
||||
["foo", "foo"],
|
||||
[nil, " \t"],
|
||||
["bar", "bar"],
|
||||
]
|
||||
expect(run(<<EOF, "foo \tbar")).to eq expected
|
||||
token foo
|
||||
token bar
|
||||
drop \\s+
|
||||
EOF
|
||||
end
|
||||
end
|
19
spec/propane/parser/item_spec.rb
Normal file
19
spec/propane/parser/item_spec.rb
Normal file
@ -0,0 +1,19 @@
|
||||
class Propane
|
||||
class Parser
|
||||
|
||||
describe Item do
|
||||
|
||||
it "operates properly with a set" do
|
||||
rule = Object.new
|
||||
item1 = Item.new(rule, 2)
|
||||
item2 = Item.new(rule, 2)
|
||||
expect(item1).to eq item2
|
||||
expect(item1.eql?(item2)).to be_truthy
|
||||
set = Set.new([item1, item2])
|
||||
expect(set.size).to eq 1
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
333
spec/propane/regex_spec.rb
Normal file
333
spec/propane/regex_spec.rb
Normal file
@ -0,0 +1,333 @@
|
||||
class Propane
|
||||
RSpec.describe Regex do
|
||||
|
||||
it "parses an empty expression" do
|
||||
regex = Regex.new("")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0].size).to eq 0
|
||||
end
|
||||
|
||||
it "parses a single character unit expression" do
|
||||
regex = Regex.new("a")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a group with a single character unit expression" do
|
||||
regex = Regex.new("(a)")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.alternates.size).to eq 1
|
||||
expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a *" do
|
||||
regex = Regex.new("a*")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a +" do
|
||||
regex = Regex.new("a+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 1
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a ?" do
|
||||
regex = Regex.new("a?")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to eq 1
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a multiplicity count" do
|
||||
regex = Regex.new("a{5}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 5
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum-only multiplicity count" do
|
||||
regex = Regex.new("a{5,}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum and maximum multiplicity count" do
|
||||
regex = Regex.new("a{5,8}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 8
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
expect(m_unit.unit.first).to eq "a".ord
|
||||
end
|
||||
|
||||
it "parses an escaped *" do
|
||||
regex = Regex.new("a\\*")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].first).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].first).to eq "*".ord
|
||||
end
|
||||
|
||||
it "parses an escaped +" do
|
||||
regex = Regex.new("a\\+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].first).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].first).to eq "+".ord
|
||||
end
|
||||
|
||||
it "parses an escaped \\" do
|
||||
regex = Regex.new("\\\\d")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].first).to eq "\\".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].first).to eq "d".ord
|
||||
end
|
||||
|
||||
it "parses a character class" do
|
||||
regex = Regex.new("[a-z_]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "a".ord
|
||||
expect(ccu[0].last).to eq "z".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].first).to eq "_".ord
|
||||
end
|
||||
|
||||
it "parses a negated character class" do
|
||||
regex = Regex.new("[^xyz]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "x".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a character class" do
|
||||
regex = Regex.new("[-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at end of a character class" do
|
||||
regex = Regex.new("[0-]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].first).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a negated character class" do
|
||||
regex = Regex.new("[^-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses . as a plain character in a character class" do
|
||||
regex = Regex.new("[.]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 1
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq ".".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character when escaped in middle of character class" do
|
||||
regex = Regex.new("[0\\-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].first).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].first).to eq "-".ord
|
||||
expect(ccu[2]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[2].first).to eq "9".ord
|
||||
end
|
||||
|
||||
it "parses alternates" do
|
||||
regex = Regex.new("ab|c")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 2
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0].size).to eq 2
|
||||
expect(regex.unit.alternates[1].size).to eq 1
|
||||
end
|
||||
|
||||
it "parses a ." do
|
||||
regex = Regex.new("a.b")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[0][1].units.size).to eq 2
|
||||
expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses something complex" do
|
||||
regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 4
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0].size).to eq 1
|
||||
expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
|
||||
expect(regex.unit.alternates[0][0].min_count).to eq 0
|
||||
expect(regex.unit.alternates[0][0].max_count).to be_nil
|
||||
expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
|
||||
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[1].size).to eq 1
|
||||
expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[1][0].negate).to be_truthy
|
||||
expect(regex.unit.alternates[1][0].size).to eq 1
|
||||
expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[2].size).to eq 2
|
||||
expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2][0].first).to eq "|".ord
|
||||
expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2][1].first).to eq "v".ord
|
||||
expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[3].size).to eq 1
|
||||
expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
|
||||
expect(regex.unit.alternates[3][0].min_count).to eq 1
|
||||
expect(regex.unit.alternates[3][0].max_count).to be_nil
|
||||
expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[3][0].unit.size).to eq 1
|
||||
expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[3][0].unit[0].first).to eq "x".ord
|
||||
expect(regex.unit.alternates[3][0].unit[0].last).to eq "y".ord
|
||||
end
|
||||
|
||||
end
|
||||
end
|
97
spec/propane_spec.rb
Normal file
97
spec/propane_spec.rb
Normal file
@ -0,0 +1,97 @@
|
||||
require "fileutils"
|
||||
|
||||
describe Propane do
|
||||
def write_grammar(grammar)
|
||||
File.write("spec/run/testparser.i", grammar)
|
||||
end
|
||||
|
||||
def build_parser
|
||||
result = system(*%w[./propane.sh spec/run/testparser.i spec/run/testparser.d])
|
||||
expect(result).to be_truthy
|
||||
end
|
||||
|
||||
def compile(test_file)
|
||||
result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d], test_file)
|
||||
expect(result).to be_truthy
|
||||
end
|
||||
|
||||
def run
|
||||
result = system("spec/run/testparser")
|
||||
expect(result).to be_truthy
|
||||
end
|
||||
|
||||
before(:each) do
|
||||
FileUtils.rm_rf("spec/run")
|
||||
FileUtils.mkdir_p("spec/run")
|
||||
end
|
||||
|
||||
it "generates a D lexer" do
|
||||
write_grammar <<EOF
|
||||
token int \\d+
|
||||
token plus \\+
|
||||
token times \\*
|
||||
drop \\s+
|
||||
Start: [Foo] <<
|
||||
>>
|
||||
Foo: [int] <<
|
||||
>>
|
||||
Foo: [plus] <<
|
||||
>>
|
||||
EOF
|
||||
build_parser
|
||||
compile("spec/test_d_lexer.d")
|
||||
run
|
||||
end
|
||||
|
||||
it "generates a parser" do
|
||||
write_grammar <<EOF
|
||||
token plus \\+
|
||||
token times \\*
|
||||
token zero 0
|
||||
token one 1
|
||||
Start: [E] <<
|
||||
>>
|
||||
E: [E times B] <<
|
||||
>>
|
||||
E: [E plus B] <<
|
||||
>>
|
||||
E: [B] <<
|
||||
>>
|
||||
B: [zero] <<
|
||||
>>
|
||||
B: [one] <<
|
||||
>>
|
||||
EOF
|
||||
build_parser
|
||||
end
|
||||
|
||||
it "distinguishes between multiple identical rules with lookahead symbol" do
|
||||
write_grammar <<EOF
|
||||
token a
|
||||
token b
|
||||
Start: [R1 a] <<
|
||||
>>
|
||||
Start: [R2 b] <<
|
||||
>>
|
||||
R1: [a b] <<
|
||||
>>
|
||||
R2: [a b] <<
|
||||
>>
|
||||
EOF
|
||||
build_parser
|
||||
end
|
||||
|
||||
it "handles reducing a rule that could be arrived at from multiple states" do
|
||||
write_grammar <<EOF
|
||||
token a
|
||||
token b
|
||||
Start: [a R1] <<
|
||||
>>
|
||||
Start: [b R1] <<
|
||||
>>
|
||||
R1: [b] <<
|
||||
>>
|
||||
EOF
|
||||
build_parser
|
||||
end
|
||||
end
|
11
spec/spec_helper.rb
Normal file
11
spec/spec_helper.rb
Normal file
@ -0,0 +1,11 @@
|
||||
require "bundler/setup"
|
||||
require "propane"
|
||||
|
||||
RSpec.configure do |config|
|
||||
# Enable flags like --only-failures and --next-failure
|
||||
config.example_status_persistence_file_path = ".rspec_status"
|
||||
|
||||
config.expect_with :rspec do |c|
|
||||
c.syntax = :expect
|
||||
end
|
||||
end
|
66
spec/test_d_lexer.d
Normal file
66
spec/test_d_lexer.d
Normal file
@ -0,0 +1,66 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
alias DCP = Testparser.Decoder.DecodedCodePoint;
|
||||
string inputstring = "5+\n 66";
|
||||
const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr;
|
||||
size_t input_length = inputstring.length;
|
||||
DCP dcp;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP('5', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP('+', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP('\n', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP(' ', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP('6', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP('6', 1u));
|
||||
input += dcp.code_point_length;
|
||||
input_length -= dcp.code_point_length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
|
||||
|
||||
inputstring = "\xf0\x9f\xa7\xa1";
|
||||
input = cast(const(ubyte) *)inputstring.ptr;
|
||||
input_length = inputstring.length;
|
||||
dcp = Testparser.Decoder.decode_code_point(input, input_length);
|
||||
assert(dcp == DCP(0x1F9E1, 4u));
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
alias LT = Testparser.Lexer.LexedToken;
|
||||
string input = "5 + 4 * \n677 + 567";
|
||||
Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length);
|
||||
assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_INT));
|
||||
assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_PLUS));
|
||||
assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_INT));
|
||||
assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_TIMES));
|
||||
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
|
||||
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
|
||||
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
|
||||
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
|
||||
|
||||
lexer = new Testparser.Lexer(null, 0u);
|
||||
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
|
||||
all:
|
||||
for d in *; do \
|
||||
if [ -d $$d ]; then \
|
||||
make -C $$d; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
clean:
|
||||
for d in *; do \
|
||||
if [ -d $$d ]; then \
|
||||
make -C $$d clean; \
|
||||
fi; \
|
||||
done
|
@ -1,15 +0,0 @@
|
||||
|
||||
TARGET := test
|
||||
I_SOURCE := itest
|
||||
CXXFLAGS := -O2
|
||||
LDFLAGS := -lpcre
|
||||
|
||||
all: $(TARGET)
|
||||
./$(TARGET)
|
||||
|
||||
$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
|
||||
imbecile $(I_SOURCE).I
|
||||
$(CXX) -o $@ *.cc $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h
|
@ -1,37 +0,0 @@
|
||||
|
||||
[tokens]
|
||||
|
||||
AND and
|
||||
OR or
|
||||
NOT not
|
||||
LPAREN \(
|
||||
RPAREN \)
|
||||
WS \s+
|
||||
EQUALS = %{ cout << "Saw '='" << endl; %}
|
||||
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* %{
|
||||
cout << "Identify: '" << matches[0] << "'" << endl;
|
||||
%}
|
||||
|
||||
DEC_INT [1-9]\d*\b
|
||||
${
|
||||
uint64_t value;
|
||||
$}
|
||||
%{
|
||||
sscanf(matches[0].c_str(), "%lld", &value);
|
||||
cout << "value: " << value << endl;
|
||||
%}
|
||||
|
||||
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
|
||||
sscanf(matches[1].c_str(), "%llx", &value);
|
||||
cout << "value: " << value << endl;
|
||||
%}
|
||||
|
||||
OCT_INT 0([0-7]*)\b
|
||||
BIN_INT 0b([01]+)\b
|
||||
|
||||
[rules]
|
||||
|
||||
Assignment := IDENTIFIER ASSIGN Expression
|
||||
|
||||
Expression := IDENTIFIER \
|
||||
| Assignment
|
@ -1,17 +0,0 @@
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "itest.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
Parser p;
|
||||
stringstream t(string(
|
||||
"hi there (one and two and three and four) or (two = nine)\n"
|
||||
"0x42 12345 0 011 0b0011\n"
|
||||
));
|
||||
p.parse(t);
|
||||
}
|
202
tmpl/parser.cc
202
tmpl/parser.cc
@ -1,202 +0,0 @@
|
||||
|
||||
#include <string.h> /* memcpy() */
|
||||
#include <pcre.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include {%header_name%}
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef I_NAMESPACE
|
||||
namespace I_NAMESPACE {
|
||||
#endif
|
||||
|
||||
I_CLASSNAME::I_CLASSNAME()
|
||||
: m_errstr(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
static TokenRef buildToken(int typeindex)
|
||||
{
|
||||
TokenRef token;
|
||||
switch (typeindex)
|
||||
{
|
||||
{%buildToken%}
|
||||
}
|
||||
if (!token.isNull())
|
||||
{
|
||||
token->setType(typeindex);
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
static void read_istream(istream & i, vector<char> & buff, int & size)
|
||||
{
|
||||
size = 0;
|
||||
int bytes_read;
|
||||
char read_buff[1000];
|
||||
while (!i.eof())
|
||||
{
|
||||
i.read(&read_buff[0], sizeof(read_buff));
|
||||
bytes_read = i.gcount();
|
||||
size += bytes_read;
|
||||
for (int j = 0; j < bytes_read; j++)
|
||||
buff.push_back(read_buff[j]);
|
||||
}
|
||||
}
|
||||
|
||||
bool I_CLASSNAME::parse(istream & i)
|
||||
{
|
||||
struct {
|
||||
const char * name;
|
||||
const char * definition;
|
||||
bool process;
|
||||
pcre * re;
|
||||
pcre_extra * re_extra;
|
||||
} tokens[] = {
|
||||
{%token_list%}
|
||||
};
|
||||
|
||||
if (sizeof(tokens)/sizeof(tokens[0]) == 0)
|
||||
{
|
||||
m_errstr = "No tokens defined";
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<char> buff;
|
||||
int buff_size;
|
||||
read_istream(i, buff, buff_size);
|
||||
|
||||
if (buff_size <= 0)
|
||||
{
|
||||
m_errstr = "0-length input string";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* append trailing NUL byte for pcre functions */
|
||||
buff.push_back('\0');
|
||||
|
||||
/* compile all token regular expressions */
|
||||
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
|
||||
{
|
||||
const char * errptr;
|
||||
int erroffset;
|
||||
tokens[i].re = pcre_compile(tokens[i].definition, 0,
|
||||
&errptr, &erroffset, NULL);
|
||||
if (tokens[i].re == NULL)
|
||||
{
|
||||
cerr << "Error compiling token '" << tokens[i].name
|
||||
<< "' regular expression at position " << erroffset
|
||||
<< ": " << errptr << endl;
|
||||
m_errstr = "Error in token regular expression";
|
||||
return false;
|
||||
}
|
||||
tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
|
||||
}
|
||||
|
||||
int buff_pos = 0;
|
||||
const int ovector_num_matches = 16;
|
||||
const int ovector_size = 3 * (ovector_num_matches + 1);
|
||||
int ovector[ovector_size];
|
||||
while (buff_pos < buff_size)
|
||||
{
|
||||
int longest_match_length = 0;
|
||||
int longest_match_index = -1;
|
||||
int longest_match_ovector[ovector_size];
|
||||
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
|
||||
{
|
||||
int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
|
||||
&buff[0], buff_size, buff_pos,
|
||||
PCRE_ANCHORED | PCRE_NOTEMPTY,
|
||||
ovector, ovector_size);
|
||||
if (rc > 0)
|
||||
{
|
||||
/* this pattern matched some of the input */
|
||||
int len = ovector[1] - ovector[0];
|
||||
if (len > longest_match_length)
|
||||
{
|
||||
longest_match_length = len;
|
||||
longest_match_index = i;
|
||||
memcpy(longest_match_ovector, ovector, sizeof(ovector));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (longest_match_index < 0)
|
||||
{
|
||||
/* no pattern matched the input at the current position */
|
||||
cerr << "Parse error" << endl;
|
||||
return false;
|
||||
}
|
||||
Matches matches(tokens[longest_match_index].re,
|
||||
&buff[0], longest_match_ovector, ovector_size);
|
||||
TokenRef token = buildToken(longest_match_index);
|
||||
if (token.isNull())
|
||||
{
|
||||
cerr << "Internal Error: null token" << endl;
|
||||
return false;
|
||||
}
|
||||
token->process(matches);
|
||||
m_tokens.push_back(token);
|
||||
buff_pos += longest_match_length;
|
||||
}
|
||||
}
|
||||
|
||||
refptr<Node> Node::operator[](int index)
|
||||
{
|
||||
return (0 <= index && index < m_indexed_children.size())
|
||||
? m_indexed_children[index]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
refptr<Node> Node::operator[](const std::string & index)
|
||||
{
|
||||
return (m_named_children.find(index) != m_named_children.end())
|
||||
? m_named_children[index]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
void Token::process(const Matches & matches)
|
||||
{
|
||||
{%token_code%}
|
||||
}
|
||||
|
||||
Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
|
||||
: m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
|
||||
{
|
||||
}
|
||||
|
||||
std::string Matches::operator[](int index) const
|
||||
{
|
||||
if (0 <= index && index < (m_ovec_size / 3))
|
||||
{
|
||||
int idx = 2 * index;
|
||||
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
|
||||
{
|
||||
return string(m_data, m_ovector[idx],
|
||||
m_ovector[idx + 1] - m_ovector[idx]);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string Matches::operator[](const std::string & index) const
|
||||
{
|
||||
int idx = pcre_get_stringnumber(m_re, index.c_str());
|
||||
if (idx > 0 && idx < (m_ovec_size / 3))
|
||||
{
|
||||
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
|
||||
{
|
||||
return string(m_data, m_ovector[idx],
|
||||
m_ovector[idx + 1] - m_ovector[idx]);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
{%token_classes_code%}
|
||||
|
||||
#ifdef I_NAMESPACE
|
||||
};
|
||||
#endif
|
181
tmpl/parser.h
181
tmpl/parser.h
@ -1,181 +0,0 @@
|
||||
|
||||
#ifndef IMBECILE_PARSER_HEADER
|
||||
#define IMBECILE_PARSER_HEADER
|
||||
|
||||
#include <pcre.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
{%user_includes%}
|
||||
|
||||
{%defines%}
|
||||
|
||||
#ifdef I_NAMESPACE
|
||||
namespace I_NAMESPACE {
|
||||
#endif
|
||||
|
||||
#ifndef REFPTR_H
|
||||
#define REFPTR_H REFPTR_H
|
||||
|
||||
/* Author: Josh Holtrop
|
||||
* Purpose: Provide a reference-counting pointer-like first order
|
||||
* C++ object that will free the object it is pointing to when
|
||||
* all references to it have been destroyed.
|
||||
* This implementation does not solve the circular reference problem.
|
||||
* I was not concerned with that when developing this class.
|
||||
*/
|
||||
#include <stdlib.h> /* NULL */
|
||||
|
||||
template <typename T>
|
||||
class refptr
|
||||
{
|
||||
public:
|
||||
refptr<T>();
|
||||
refptr<T>(T * ptr);
|
||||
refptr<T>(const refptr<T> & orig);
|
||||
refptr<T> & operator=(const refptr<T> & orig);
|
||||
refptr<T> & operator=(T * ptr);
|
||||
~refptr<T>();
|
||||
T & operator*() const { return *m_ptr; }
|
||||
T * operator->() const { return m_ptr; }
|
||||
bool isNull() const { return m_ptr == NULL; }
|
||||
|
||||
private:
|
||||
void cloneFrom(const refptr<T> & orig);
|
||||
void destroy();
|
||||
|
||||
T * m_ptr;
|
||||
int * m_refCount;
|
||||
};
|
||||
|
||||
template <typename T> refptr<T>::refptr()
|
||||
{
|
||||
m_ptr = NULL;
|
||||
m_refCount = NULL;
|
||||
}
|
||||
|
||||
template <typename T> refptr<T>::refptr(T * ptr)
|
||||
{
|
||||
m_ptr = ptr;
|
||||
m_refCount = new int;
|
||||
*m_refCount = 1;
|
||||
}
|
||||
|
||||
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
|
||||
{
|
||||
cloneFrom(orig);
|
||||
}
|
||||
|
||||
template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
|
||||
{
|
||||
destroy();
|
||||
cloneFrom(orig);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
|
||||
{
|
||||
destroy();
|
||||
m_ptr = ptr;
|
||||
m_refCount = new int;
|
||||
*m_refCount = 1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
|
||||
{
|
||||
this->m_ptr = orig.m_ptr;
|
||||
this->m_refCount = orig.m_refCount;
|
||||
if (m_refCount != NULL)
|
||||
(*m_refCount)++;
|
||||
}
|
||||
|
||||
template <typename T> refptr<T>::~refptr()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
template <typename T> void refptr<T>::destroy()
|
||||
{
|
||||
if (m_refCount != NULL)
|
||||
{
|
||||
if (*m_refCount <= 1)
|
||||
{
|
||||
delete m_ptr;
|
||||
delete m_refCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
(*m_refCount)--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
class Matches
|
||||
{
|
||||
public:
|
||||
Matches(pcre * re, const char * data, int * ovector, int ovec_size);
|
||||
std::string operator[](int index) const;
|
||||
std::string operator[](const std::string & index) const;
|
||||
|
||||
protected:
|
||||
pcre * m_re;
|
||||
const char * m_data;
|
||||
int * m_ovector;
|
||||
int m_ovec_size;
|
||||
};
|
||||
|
||||
class Node
|
||||
{
|
||||
public:
|
||||
refptr<Node> operator[](int index);
|
||||
refptr<Node> operator[](const std::string & index);
|
||||
|
||||
protected:
|
||||
std::map< std::string, refptr<Node> > m_named_children;
|
||||
std::vector< refptr<Node> > m_indexed_children;
|
||||
};
|
||||
typedef refptr<Node> NodeRef;
|
||||
|
||||
class Token : public Node
|
||||
{
|
||||
public:
|
||||
virtual void process(const Matches & matches);
|
||||
void setType(int type) { m_type = type; }
|
||||
int getType() const { return m_type; }
|
||||
|
||||
protected:
|
||||
int m_type;
|
||||
|
||||
{%token_data%}
|
||||
};
|
||||
typedef refptr<Token> TokenRef;
|
||||
|
||||
{%token_classes%}
|
||||
|
||||
class I_CLASSNAME
|
||||
{
|
||||
public:
|
||||
I_CLASSNAME();
|
||||
bool parse(std::istream & in);
|
||||
const char * getError() { return m_errstr; }
|
||||
|
||||
protected:
|
||||
const char * m_errstr;
|
||||
std::list<TokenRef> m_tokens;
|
||||
};
|
||||
|
||||
#ifdef I_NAMESPACE
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* IMBECILE_PARSER_HEADER */
|
Loading…
x
Reference in New Issue
Block a user