Compare commits

..

No commits in common. "164a4854fbb2162dd2614e028496829713e871f6" and "064bb94108ae57cb261c192bafc7734edce7cdb3" have entirely different histories.

51 changed files with 1317 additions and 2313 deletions

19
.gitignore vendored
View File

@ -1,10 +1,9 @@
/.bundle/
/.yardoc
/_yardoc/
/coverage/
/doc/
/pkg/
/spec/reports/
/tmp/
/.rspec_status
/spec/run/
imbecile
tags
*.o
.*.swp
*.dep
tmpl.*
tests/*/itest.cc
tests/*/itest.h
tests/*/test

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "refptr"]
path = refptr
url = http://github.com/holtrop/refptr.git

3
.rspec
View File

@ -1,3 +0,0 @@
--format documentation
--color
--require spec_helper

View File

@ -1,4 +0,0 @@
source "https://rubygems.org"
gem "rake"
gem "rspec"

View File

@ -1,28 +0,0 @@
GEM
remote: https://rubygems.org/
specs:
diff-lcs (1.5.0)
rake (13.0.6)
rspec (3.11.0)
rspec-core (~> 3.11.0)
rspec-expectations (~> 3.11.0)
rspec-mocks (~> 3.11.0)
rspec-core (3.11.0)
rspec-support (~> 3.11.0)
rspec-expectations (3.11.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.11.0)
rspec-mocks (3.11.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.11.0)
rspec-support (3.11.0)
PLATFORMS
ruby
DEPENDENCIES
rake
rspec
BUNDLED WITH
2.4.0.dev

View File

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2010-2022 Josh Holtrop
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

61
Makefile Normal file
View File

@ -0,0 +1,61 @@
TARGET := imbecile
CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
CXXDEPS := $(patsubst %.o,.%.dep,$(CXXOBJS))
CXXFLAGS := -O2
DEPS := $(CXXDEPS)
OBJS := $(CXXOBJS)
LDFLAGS := -lpcre
CPPFLAGS := -I$(shell pwd)/refptr
all: submodule_check tmpl.h $(TARGET)
.PHONY: submodule_check
submodule_check:
@if [ ! -e refptr/refptr.h ]; then \
echo Error: \"refptr\" folder is not populated.; \
echo Perhaps you forgot to do \"git checkout --recursive\"?; \
echo You can remedy the situation with \"git submodule update --init\".; \
exit 1; \
fi
$(TARGET): $(OBJS)
$(CXX) -o $@ $^ $(LDFLAGS)
# Object file rules
%.o: %.cc
$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
# Make dependency files
.%.dep: %.c
@set -e; rm -f $@; \
$(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
.%.dep: %.cc tmpl.h
@set -e; rm -f $@; \
$(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
tmpl.cc: $(wildcard tmpl/*)
echo -n > $@
for f in $*/*; \
do xxd -i $$f >> $@; \
done
tmpl.h: tmpl.cc
echo '#ifndef $*_h' > $@
echo '#define $*_h' >> $@
grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
echo '#endif' >> $@
.PHONY: tests
tests: PATH := $(shell pwd):$(PATH)
tests: all
$(MAKE) -C $@
tests-clean:
$(MAKE) -C tests clean
clean: tests-clean
-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
-include $(CXXDEPS)

423
Parser.cc Normal file
View File

@ -0,0 +1,423 @@
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#include <ctype.h> /* toupper() */
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include "Parser.h"
#include "TokenDefinition.h"
#include "RuleDefinition.h"
#include "tmpl.h"
using namespace std;
#define DEBUG
Parser::Parser()
: m_classname("Parser"), m_namespace(""), m_extension("cc"),
m_token_data(new string()), m_token_code(new string()),
m_defines(new string())
{
}
void Parser::makeDefine(const string & defname, const string & definition)
{
*m_defines += string("#define ") + defname + " " + definition + "\n";
}
bool Parser::write(const string & fname)
{
if (m_tokens.size() < 1 || m_rules.size() < 1)
return false;
string header_fname = fname + ".h";
string body_fname = fname + "." + m_extension;
ofstream header(header_fname.c_str());
ofstream body(body_fname.c_str());
/* process data */
refptr<string> token_classes = new string();
refptr<string> token_classes_code = new string();
int i = 0;
for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
it != m_tokens.end();
it++)
{
char buff[20];
sprintf(buff, "%d", i++);
makeDefine((*it)->getIdentifier(), buff);
*token_classes += (*it)->getClassDefinition();
*token_classes_code += (*it)->getProcessMethod();
}
if (m_namespace != "")
{
makeDefine("I_NAMESPACE", m_namespace);
}
makeDefine("I_CLASSNAME", m_classname);
/* set up replacements */
setReplacement("token_list", buildTokenList());
setReplacement("buildToken", buildBuildToken());
setReplacement("header_name",
new string(string("\"") + header_fname + "\""));
setReplacement("token_code", m_token_code);
setReplacement("token_data", m_token_data);
setReplacement("defines", m_defines);
setReplacement("token_classes", token_classes);
setReplacement("token_classes_code", token_classes_code);
/* write the header */
writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
/* write the body */
writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
header.close();
body.close();
return true;
}
bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
{
char * newline;
char * data = dat;
const char * errptr;
int erroffset;
data[len-1] = '\n';
const int ovec_size = 6;
int ovector[ovec_size];
pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
{
if (pcre_exec(replace, NULL, data, newline - data,
0, 0, ovector, ovec_size) >= 0)
{
if (ovector[0] > 0)
{
out.write(data, ovector[0]);
}
out << *getReplacement(string(data, ovector[2],
ovector[3] - ovector[2]));
if (ovector[1] < newline - data)
{
out.write(data + ovector[1], newline - data - ovector[1]);
}
}
else
{
out.write(data, newline - data);
}
out << '\n';
data = newline + 1;
}
}
refptr<std::string> Parser::getReplacement(const std::string & name)
{
if (m_replacements.find(name) != m_replacements.end())
{
return m_replacements[name];
}
#ifdef DEBUG
cerr << "No replacement found for \"" << name << "\"" << endl;
#endif
return new string("");
}
refptr<string> Parser::buildTokenList()
{
refptr<string> tokenlist = new string();
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
t != m_tokens.end();
t++)
{
if (t != m_tokens.begin())
*tokenlist += " ";
*tokenlist += "{ \"" + (*t)->getName() + "\", \""
+ (*t)->getCString() + "\", "
+ ((*t)->getProcessFlag() ? "true" : "false") + " }";
if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
*tokenlist += ",\n";
}
return tokenlist;
}
refptr<string> Parser::buildBuildToken()
{
refptr<string> buildToken = new string();
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
t != m_tokens.end();
t++)
{
*buildToken += "case " + (*t)->getIdentifier() + ":\n";
*buildToken += " token = new " + (*t)->getClassName() + "();\n";
*buildToken += " break;\n";
}
return buildToken;
}
bool Parser::parseInputFile(char * buff, int size)
{
typedef pcre * pcre_ptr;
enum { none, tokens, rules };
pcre_ptr empty, comment, section_name, token, rule,
data_begin, data_end, code_begin, code_end;
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
{&empty, "^\\s*$"},
{&comment, "^\\s*#"},
{&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
{&token, "^\\s*" /* possible leading ws */
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
"\\s+" /* required whitespace */
"((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
{&data_begin, "^\\s*\\${"},
{&data_end, "\\$}"},
{&code_begin, "^\\s*%{"},
{&code_end, "%}"}
};
const int ovec_size = 3 * 10;
int ovector[ovec_size];
int lineno = 0;
char * newline;
char * input = buff;
string current_section_name;
map<string, int> sections;
sections["none"] = none;
sections["tokens"] = tokens;
sections["rules"] = rules;
int section = none;
string line;
bool append_line = false;
bool gathering_data = false;
bool gathering_code = false;
string gather;
bool continue_line = false;
TokenDefinitionRef current_token;
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{
const char * errptr;
int erroffset;
*exprs[i].re = pcre_compile(exprs[i].pattern, 0,
&errptr, &erroffset, NULL);
if (*exprs[i].re == NULL)
{
cerr << "Error compiling regex '" << exprs[i].pattern <<
"': " << errptr << " at position " << erroffset << endl;
return false;
}
}
for (;;)
{
if (continue_line)
{
continue_line = false;
}
else
{
if ((newline = strstr(input, "\n")) == NULL)
break;
int line_length = newline - input;
if (line_length >= 1 && newline[-1] == '\r')
{
newline[-1] = '\n';
line_length--;
}
lineno++;
if (append_line)
{
line += string(input, line_length);
}
else
{
line = string(input, line_length);
}
input = newline + 1; /* set up for next loop iteration */
}
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
|| (pcre_exec(comment, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
)
{
/* skip empty or comment lines */;
continue;
}
if (! (gathering_code || gathering_data) )
{
if (line.size() > 0 && line[line.size()-1] == '\\')
{
line[line.size()-1] = ' ';
append_line = true;
continue;
}
else
{
append_line = false;
}
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
current_section_name
= string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(current_section_name) != sections.end())
{
section = sections[current_section_name];
}
else
{
cerr << "Unknown section name '" << current_section_name
<< "'!" << endl;
return false;
}
continue;
}
}
switch (section)
{
case none:
cerr << "Unrecognized input on line " << lineno << endl;
return false;
case tokens:
if (gathering_data)
{
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_data = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
*m_token_data += gather;
}
else
{
current_token->addData(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (gathering_code)
{
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_code = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
*m_token_code += gather;
}
else
{
current_token->addCode(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_data = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_code = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line,
ovector[4], ovector[5] - ovector[4]);
current_token = new TokenDefinition();
if (current_token->create(name, definition))
{
addTokenDefinition(current_token);
}
else
{
cerr << "Error in token definition ending on line "
<< lineno << endl;
return false;
}
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else
{
cerr << "Unrecognized input on line " << lineno << endl;
return false;
}
break;
case rules:
if (pcre_exec(rule, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line,
ovector[4], ovector[5] - ovector[4]);
refptr<RuleDefinition> rd = new RuleDefinition();
if (rd->create(name, definition))
{
addRuleDefinition(rd);
}
else
{
cerr << "Error in rule definition ending on line "
<< lineno << endl;
return false;
}
}
else
{
cerr << "Unrecognized input on line " << lineno << endl;
return false;
}
break;
}
}
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{
pcre_free(*exprs[i].re);
}
return true;
}

61
Parser.h Normal file
View File

@ -0,0 +1,61 @@
#ifndef PARSER_H
#define PARSER_H
#include <vector>
#include <string>
#include <list>
#include <map>
#include "refptr.h"
#include "TokenDefinition.h"
#include "RuleDefinition.h"
class Parser
{
public:
Parser();
void addTokenDefinition(refptr<TokenDefinition> td)
{
m_tokens.push_back(td);
}
void addRuleDefinition(refptr<RuleDefinition> rd)
{
m_rules.push_back(rd);
}
bool write(const std::string & fname);
bool parseInputFile(char * buff, int size);
void setClassName(const std::string & cn) { m_classname = cn; }
std::string getClassName() { return m_classname; }
void setNamespace(const std::string & ns) { m_namespace = ns; }
std::string getNamespace() { return m_namespace; }
void setExtension(const std::string & e) { m_extension = e; }
std::string getExtension() { return m_extension; }
protected:
refptr<std::string> buildTokenList();
refptr<std::string> buildBuildToken();
bool writeTmpl(std::ostream & out, char * dat, int len);
refptr<std::string> getReplacement(const std::string & name);
void setReplacement(const std::string & name, refptr<std::string> val)
{
m_replacements[name] = val;
}
void makeDefine(const std::string & defname,
const std::string & definition);
std::list<TokenDefinitionRef> m_tokens;
std::vector< refptr< RuleDefinition > > m_rules;
std::string m_classname;
std::string m_namespace;
std::string m_extension;
std::map< std::string, refptr<std::string> > m_replacements;
refptr<std::string> m_token_data;
refptr<std::string> m_token_code;
refptr<std::string> m_defines;
};
#endif

5
README Normal file
View File

@ -0,0 +1,5 @@
Imbecile is a bottom-up parser generator. It targets C++ and automatically
generates a class heirarchy for interacting with the parser.
Imbecile generates both a lexer and a parser based on the rules given to
it in the input file.

View File

@ -1,31 +0,0 @@
# The Propane Parser Generator
Propane is an LR Parser Generator (LPG) which:
* accepts LR(0), SLR, and LALR grammars
* generates a built-in lexer to tokenize input
* supports UTF-8 lexer inputs
* generates a table-driven parser to parse input in linear time
* is MIT-licensed
* is distributable as a standalone Ruby script
## Installation
TODO
## Usage
TODO: Write usage instructions here
## Development
After checking out the repository, run `bundle install` to install dependencies.
Run `rake spec` to execute tests.
## Contributing
Bug reports and pull requests are welcome on GitHub at https://github.com/holtrop/propane.
## License
Propane is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

View File

@ -1,9 +0,0 @@
require "rspec/core/rake_task"
RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
if args.example_pattern
task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
end
end
task :default => :spec

9
RuleDefinition.cc Normal file
View File

@ -0,0 +1,9 @@
#include "RuleDefinition.h"
using namespace std;
bool RuleDefinition::create(const string & name, const string & definition)
{
m_name = name;
}

16
RuleDefinition.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef RULEDEFINITION_H
#define RULEDEFINITION_H
#include <string>
class RuleDefinition
{
public:
bool create(const std::string & name, const std::string & definition);
protected:
std::string m_name;
};
#endif

125
TokenDefinition.cc Normal file
View File

@ -0,0 +1,125 @@
#include <pcre.h>
#include <iostream>
#include <string>
#include <vector>
#include "TokenDefinition.h"
#include "refptr.h"
using namespace std;
#define WHITESPACE " \n\r\t\v"
static string trim(string s)
{
size_t lastpos = s.find_last_not_of(WHITESPACE);
if (lastpos == string::npos)
return "";
s.erase(lastpos + 1);
s.erase(0, s.find_first_not_of(WHITESPACE));
return s;
}
static refptr< vector<string> > split(const string & delim, string str)
{
refptr< vector<string> > ret = new vector<string>();
size_t pos;
while ( (pos = str.find(delim)) != string::npos )
{
string t = str.substr(0, pos);
ret->push_back(t);
str.erase(0, pos + 1);
}
if (str != "")
ret->push_back(str);
return ret;
}
static string c_escape(const string & orig)
{
string result;
for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
{
if (*it == '\\' || *it == '"')
result += '\\';
result += *it;
}
return result;
}
TokenDefinition::TokenDefinition()
: m_process(false)
{
}
bool TokenDefinition::create(const string & name,
const string & definition)
{
const char * errptr;
int erroffset;
pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
if (re == NULL)
{
cerr << "Error compiling regular expression '" << definition
<< "' at position " << erroffset << ": " << errptr << endl;
return false;
}
m_name = name;
m_definition = definition;
pcre_free(re);
#if 0
refptr< vector< string > > parts = split(",", flags);
for (int i = 0, sz = parts->size(); i < sz; i++)
{
(*parts)[i] = trim((*parts)[i]);
string & s = (*parts)[i];
if (s == "p")
{
m_process = true;
}
else
{
cerr << "Unknown token flag \"" << s << "\"" << endl;
return false;
}
}
#endif
return true;
}
string TokenDefinition::getCString() const
{
return c_escape(m_definition);
}
string TokenDefinition::getClassDefinition() const
{
string ret = "class "+ getClassName() + " : public Token {\n";
ret += "public:\n";
if (m_process)
{
ret += " virtual void process(const Matches & matches);\n";
}
ret += "\n";
ret += "protected:\n";
ret += m_data + "\n";
ret += "};\n";
return ret;
}
string TokenDefinition::getProcessMethod() const
{
string ret;
if (m_code != "")
{
ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
ret += m_code + "\n";
ret += "}\n";
}
return ret;
}

37
TokenDefinition.h Normal file
View File

@ -0,0 +1,37 @@
#ifndef TOKENDEFINITION_H
#define TOKENDEFINITION_H
#include <string>
#include "refptr.h"
class TokenDefinition
{
public:
TokenDefinition();
bool create(const std::string & name,
const std::string & definition);
std::string getCString() const;
std::string getName() const { return m_name; }
bool getProcessFlag() const { return m_process; }
void setProcessFlag(bool p) { m_process = p; }
void addData(const std::string & d) { m_data += d; }
std::string getData() const { return m_data; }
void addCode(const std::string & c) { m_code += c; m_process = true; }
std::string getCode() const { return m_code; }
std::string getClassDefinition() const;
std::string getProcessMethod() const;
std::string getIdentifier() const { return "TK_" + m_name; }
std::string getClassName() const { return "Tk" + m_name; }
protected:
std::string m_name;
std::string m_definition;
bool m_process;
std::string m_data;
std::string m_code;
};
typedef refptr<TokenDefinition> TokenDefinitionRef;
#endif

View File

@ -1,252 +0,0 @@
<% if @modulename %>
module <%= @modulename %>;
<% end %>
class <%= classname %>
{
enum
{
<% @tokens.each_with_index do |(name, token), index| %>
<% if token.name %>
TOKEN_<%= token.c_name %> = <%= index %>,
<% end %>
<% end %>
TOKEN_EOF = <%= TOKEN_EOF %>,
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
TOKEN_DROP = <%= TOKEN_DROP %>,
TOKEN_NONE = <%= TOKEN_NONE %>,
}
static immutable string TokenNames[] = [
<% @tokens.each_with_index do |(name, token), index| %>
<% if token.name %>
"<%= token.name %>",
<% else %>
null,
<% end %>
<% end %>
];
static class Decoder
{
enum
{
CODE_POINT_INVALID = 0xFFFFFFFE,
CODE_POINT_EOF = 0xFFFFFFFF,
}
struct DecodedCodePoint
{
uint code_point;
uint code_point_length;
}
static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length)
{
if (input_length == 0u)
{
return DecodedCodePoint(CODE_POINT_EOF, 0u);
}
ubyte c = *input;
uint code_point;
uint code_point_length;
if ((c & 0x80u) == 0u)
{
code_point = c;
code_point_length = 1u;
}
else
{
ubyte following_bytes;
if ((c & 0xE0u) == 0xC0u)
{
code_point = c & 0x1Fu;
following_bytes = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
code_point = c & 0x0Fu;
following_bytes = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
code_point = c & 0x07u;
following_bytes = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
code_point = c & 0x03u;
following_bytes = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
code_point = c & 0x01u;
following_bytes = 5u;
}
if (input_length <= following_bytes)
{
return DecodedCodePoint(CODE_POINT_INVALID, 0u);
}
code_point_length = following_bytes + 1u;
while (following_bytes-- > 0u)
{
input++;
code_point <<= 6u;
code_point |= *input & 0x3Fu;
}
}
return DecodedCodePoint(code_point, code_point_length);
}
}
static class Lexer
{
private struct Transition
{
uint first;
uint last;
uint destination;
}
private struct State
{
uint transition_table_index;
uint n_transitions;
uint accepts;
}
<% transition_table, state_table = lexer.dfa.build_tables %>
private static const Transition transitions[] = [
<% transition_table.each do |transition_table_entry| %>
Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
<% end %>
];
private static const State states[] = [
<% state_table.each do |state_table_entry| %>
State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
<% end %>
];
struct LexedToken
{
size_t row;
size_t col;
size_t length;
uint token;
}
private const(ubyte) * m_input;
private size_t m_input_length;
private size_t m_input_position;
private size_t m_input_row;
private size_t m_input_col;
this(const(ubyte) * input, size_t input_length)
{
m_input = input;
m_input_length = input_length;
}
LexedToken lex_token()
{
for (;;)
{
LexedToken lt = attempt_lex_token();
if (lt.token != TOKEN_DROP)
{
return lt;
}
}
}
private LexedToken attempt_lex_token()
{
LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
struct LexedTokenState
{
size_t length;
size_t delta_row;
size_t delta_col;
uint token;
}
LexedTokenState last_accepts_info;
last_accepts_info.token = TOKEN_NONE;
LexedTokenState attempt_info;
uint current_state;
for (;;)
{
auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
if (decoded.code_point == Decoder.CODE_POINT_INVALID)
{
lt.token = TOKEN_DECODE_ERROR;
return lt;
}
bool lex_continue = false;
if (decoded.code_point != Decoder.CODE_POINT_EOF)
{
uint dest = transition(current_state, decoded.code_point);
if (dest != cast(uint)-1)
{
lex_continue = true;
attempt_info.length += decoded.code_point_length;
if (decoded.code_point == '\n')
{
attempt_info.delta_row++;
attempt_info.delta_col = 0u;
}
else
{
attempt_info.delta_col++;
}
current_state = dest;
if (states[current_state].accepts != TOKEN_NONE)
{
attempt_info.token = states[current_state].accepts;
last_accepts_info = attempt_info;
}
}
}
else if (attempt_info.length == 0u)
{
lt.token = TOKEN_EOF;
break;
}
if (!lex_continue)
{
if (last_accepts_info.token != TOKEN_NONE)
{
lt.token = last_accepts_info.token;
lt.length = last_accepts_info.length;
m_input_position += last_accepts_info.length;
m_input_row += last_accepts_info.delta_row;
if (last_accepts_info.delta_row != 0u)
{
m_input_col = last_accepts_info.delta_col;
}
else
{
m_input_col += last_accepts_info.delta_col;
}
}
break;
}
}
return lt;
}
private uint transition(uint current_state, uint code_point)
{
uint transition_table_index = states[current_state].transition_table_index;
for (uint i = 0u; i < states[current_state].n_transitions; i++)
{
if ((transitions[transition_table_index + i].first <= code_point) &&
(code_point <= transitions[transition_table_index + i].last))
{
return transitions[transition_table_index + i].destination;
}
}
return cast(uint)-1;
}
}
}

View File

@ -1,5 +0,0 @@
#!/usr/bin/env ruby
require "propane"
exit Propane::CLI.run(ARGV.dup)

101
imbecile.cc Normal file
View File

@ -0,0 +1,101 @@
#include <getopt.h>
#include <iostream>
#include <fstream>
#include "refptr.h"
#include "Parser.h"
using namespace std;
string buildOutputFilename(string & input_fname);
int main(int argc, char * argv[])
{
int longind = 1;
int opt;
Parser p;
string outfile;
static struct option longopts[] = {
/* name, has_arg, flag, val */
{ "classname", required_argument, NULL, 'c' },
{ "extension", required_argument, NULL, 'e' },
{ "namespace", required_argument, NULL, 'n' },
{ "outfile", required_argument, NULL, 'o' },
{ NULL, 0, NULL, 0 }
};
while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
{
switch (opt)
{
case 'c': /* classname */
p.setClassName(optarg);
break;
case 'e': /* extension */
p.setExtension(optarg);
break;
case 'n': /* namespace */
p.setNamespace(optarg);
break;
case 'o': /* outfile */
outfile = optarg;
break;
}
}
if (optind >= argc)
{
cerr << "Usage: imbecile [options] <input-file>" << endl;
return 1;
}
string input_fname = argv[optind];
ifstream ifs;
ifs.open(input_fname.c_str(), ios::binary);
if (!ifs.is_open())
{
cerr << "Error opening input file: '" << input_fname << "'";
return 2;
}
ifs.seekg(0, ios_base::end);
int size = ifs.tellg();
ifs.seekg(0, ios_base::beg);
char * buff = new char[size];
ifs.read(buff, size);
ifs.close();
if (outfile == "")
outfile = buildOutputFilename(input_fname);
if (!p.parseInputFile(buff, size))
{
cerr << "Error parsing " << input_fname << endl;
return 3;
}
if (!p.write(outfile))
{
cerr << "Error processing " << input_fname << endl;
return 4;
}
delete[] buff;
return 0;
}
string buildOutputFilename(string & input_fname)
{
string outfile;
size_t len = input_fname.length();
if (len > 2 && input_fname.substr(len - 2) == ".I")
{
outfile = input_fname.substr(0, len - 2);
}
else
{
outfile = input_fname;
}
return outfile;
}

View File

@ -1,137 +0,0 @@
require "erb"
require "set"
require_relative "propane/cli"
require_relative "propane/code_point_range"
require_relative "propane/fa"
require_relative "propane/fa/state"
require_relative "propane/fa/state/transition"
require_relative "propane/lexer"
require_relative "propane/lexer/dfa"
require_relative "propane/parser"
require_relative "propane/parser/item"
require_relative "propane/parser/item_set"
require_relative "propane/regex"
require_relative "propane/regex/nfa"
require_relative "propane/regex/unit"
require_relative "propane/rule"
require_relative "propane/token"
require_relative "propane/version"
class Propane
# EOF.
TOKEN_EOF = 0xFFFFFFFC
# Decoding error.
TOKEN_DECODE_ERROR = 0xFFFFFFFD
# Token ID for a "dropped" token.
TOKEN_DROP = 0xFFFFFFFE
# Invalid token ID.
TOKEN_NONE = 0xFFFFFFFF
class Error < RuntimeError
end
def initialize(input)
@tokens = {}
@rules = {}
input = input.gsub("\r\n", "\n")
while !input.empty?
parse_grammar(input)
end
end
def generate(output_file, log_file)
expand_rules
lexer = Lexer.new(@tokens)
parser = Parser.new(@tokens, @rules)
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh|
fh.write(result)
end
end
private
def parse_grammar(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @tokens[name]
raise Error.new("Duplicate token name #{name}")
else
@tokens[name] = Token.new(name, pattern, @tokens.size)
end
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens[name] = Token.new(nil, pattern, @tokens.size)
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, components, code = $1, $2, $3
components = components.strip.split(/\s+/)
@rules[rule_name] ||= Rule.new(rule_name, @rules.size)
@rules[rule_name].add_pattern(components, code)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end
end
def expand_rules
@rules.each do |rule_name, rule|
if @tokens.include?(rule_name)
raise Error.new("Rule name collides with token name #{rule_name}")
end
end
unless @rules["Start"]
raise Error.new("Start rule not found")
end
@rules.each do |rule_name, rule|
rule.patterns.each do |rule|
rule.components.map! do |component|
if @tokens[component]
@tokens[component]
elsif @rules[component]
@rules[component]
else
raise Error.new("Symbol #{component} not found")
end
end
end
end
end
class << self
def run(input_file, output_file, log_file)
begin
propane = Propane.new(File.read(input_file))
propane.generate(output_file, log_file)
rescue Error => e
$stderr.puts e.message
return 2
end
return 0
end
end
end

View File

@ -1,54 +0,0 @@
class Propane
module CLI
USAGE = <<EOF
Usage: #{$0} [options] <input-file> <output-file>
Options:
--log LOG Write log file
--version Show program version and exit
-h, --help Show this usage and exit
EOF
class << self
def run(args)
params = []
log_file = nil
i = 0
while i < args.size
arg = args[i]
case arg
when "--log"
if i + 1 < args.size
i += 1
log_file = args[i]
end
when "--version"
puts "propane v#{VERSION}"
return 0
when "-h", "--help"
puts USAGE
return 0
when /^-/
$stderr.puts "Error: unknown option #{arg}"
return 1
else
params << arg
end
i += 1
end
if params.size != 2
$stderr.puts "Error: specify input and output files"
return 1
end
unless File.readable?(params[0])
$stderr.puts "Error: cannot read #{params[0]}"
return 2
end
Propane.run(*params, log_file)
end
end
end
end

View File

@ -1,84 +0,0 @@
class Propane
class CodePointRange
MAX_CODE_POINT = 0xFFFFFFFF
attr_reader :first
attr_reader :last
include Comparable
# Build a CodePointRange
def initialize(first, last = nil)
@first = first.ord
if last
@last = last.ord
if @last < @first
raise "Invalid CodePointRange: last code point must be > first code point"
end
else
@last = @first
end
end
def <=>(other)
if self.first != other.first
@first <=> other.first
else
@last <=> other.last
end
end
def include?(v)
if v.is_a?(CodePointRange)
@first <= v.first && v.last <= @last
else
@first <= v && v <= @last
end
end
def size
@last - @first + 1
end
class << self
def invert_ranges(code_point_ranges)
new_ranges = []
last_cp = -1
code_point_ranges.sort.each do |code_point_range|
if code_point_range.first > (last_cp + 1)
new_ranges << CodePointRange.new(last_cp + 1, code_point_range.first - 1)
last_cp = code_point_range.last
else
last_cp = [last_cp, code_point_range.last].max
end
end
if last_cp < MAX_CODE_POINT
new_ranges << CodePointRange.new(last_cp + 1, MAX_CODE_POINT)
end
new_ranges
end
def first_subrange(code_point_ranges)
code_point_ranges.sort.reduce do |result, code_point_range|
if code_point_range.include?(result.first)
if code_point_range.last < result.last
code_point_range
else
result
end
else
if code_point_range.first <= result.last
CodePointRange.new(result.first, code_point_range.first - 1)
else
result
end
end
end
end
end
end
end

View File

@ -1,61 +0,0 @@
class Propane
class FA
attr_reader :start_state
def initialize
@start_state = State.new
end
def to_s
chr = lambda do |value|
if value < 32 || value > 127
"{#{value}}"
else
value.chr
end
end
rv = ""
states = enumerate
states.each do |state, id|
accepts_s = state.accepts ? " #{state.accepts}" : ""
rv += "#{id}#{accepts_s}:\n"
state.transitions.each do |transition|
if transition.nil?
range_s = "nil"
else
range_s = chr[transition.code_point_range.first]
if transition.code_point_range.size > 1
range_s += "-" + chr[transition.code_point_range.last]
end
end
accepts_s = transition.destination.accepts ? " #{transition.destination.accepts}" : ""
rv += " #{range_s} => #{states[transition.destination]}#{accepts_s}\n"
end
end
rv
end
def enumerate
@_enumerated ||=
begin
id = 0
states = {}
visit = lambda do |state|
unless states.include?(state)
states[state] = id
id += 1
state.transitions.each do |transition|
visit[transition.destination]
end
end
end
visit[@start_state]
states
end
end
end
end

View File

@ -1,51 +0,0 @@
class Propane
class FA
class State
attr_accessor :accepts
attr_reader :transitions
def initialize
@transitions = []
end
def add_transition(code_point_range, destination)
@transitions << Transition.new(code_point_range, destination)
end
# Determine the set of states that can be reached by nil transitions.
# from this state.
#
# @return [Set<NFA::State>]
# Set of states.
def nil_transition_states
states = Set[self]
analyze_state = lambda do |state|
state.nil_transitions.each do |transition|
unless states.include?(transition.destination)
states << transition.destination
analyze_state[transition.destination]
end
end
end
analyze_state[self]
states
end
def nil_transitions
@transitions.select do |transition|
transition.nil?
end
end
def cp_transitions
@transitions.reject do |transition|
transition.nil?
end
end
end
end
end

View File

@ -1,23 +0,0 @@
class Propane
class FA
class State
class Transition
attr_reader :code_point_range
attr_reader :destination
def initialize(code_point_range, destination)
@code_point_range = code_point_range
@destination = destination
end
def nil?
@code_point_range.nil?
end
end
end
end
end

View File

@ -1,13 +0,0 @@
class Propane
class Lexer
# @return [DFA]
# Lexer DFA.
attr_accessor :dfa
def initialize(tokens)
@dfa = DFA.new(tokens)
end
end
end

View File

@ -1,118 +0,0 @@
class Propane
class Lexer
class DFA < FA
def initialize(tokens)
super()
start_nfa = Regex::NFA.new
tokens.each do |name, token|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
end
@nfa_state_sets = {}
@states = []
@to_process = Set.new
nil_transition_states = start_nfa.start_state.nil_transition_states
register_nfa_state_set(nil_transition_states)
while @to_process.size > 0
state_set = @to_process.first
@to_process.delete(state_set)
process_nfa_state_set(state_set)
end
@start_state = @states[0]
end
def build_tables
transition_table = []
state_table = []
states = enumerate
states.each do |state, id|
accepts =
if state.accepts.nil?
TOKEN_NONE
elsif state.accepts.name
state.accepts.id
else
TOKEN_DROP
end
state_table << {
transition_table_index: transition_table.size,
n_transitions: state.transitions.size,
accepts: accepts,
}
state.transitions.each do |transition|
transition_table << {
first: transition.code_point_range.first,
last: transition.code_point_range.last,
destination: states[transition.destination],
}
end
end
[transition_table, state_table]
end
private
def register_nfa_state_set(nfa_state_set)
unless @nfa_state_sets.include?(nfa_state_set)
state_id = @states.size
@nfa_state_sets[nfa_state_set] = state_id
@states << State.new
@to_process << nfa_state_set
end
end
def process_nfa_state_set(nfa_state_set)
state_id = @nfa_state_sets[nfa_state_set]
state = @states[state_id]
if state_id > 0
nfa_state_set.each do |nfa_state|
if nfa_state.accepts
if state.accepts
if nfa_state.accepts.id < state.accepts.id
state.accepts = nfa_state.accepts
end
else
state.accepts = nfa_state.accepts
end
end
end
end
transitions = transitions_for(nfa_state_set)
while transitions.size > 0
subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
if transition.code_point_range.include?(subrange)
result << transition.destination
end
result
end
dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
result + dest_nfa_state.nil_transition_states
end
register_nfa_state_set(dest_nfa_states)
dest_state = @states[@nfa_state_sets[dest_nfa_states]]
state.add_transition(subrange, dest_state)
transitions.delete_if do |transition|
transition.code_point_range.last <= subrange.last
end
transitions.map! do |transition|
if transition.code_point_range.first <= subrange.last
Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
else
transition
end
end
end
end
def transitions_for(nfa_state_set)
nfa_state_set.reduce([]) do |result, state|
result + state.cp_transitions
end
end
end
end
end

View File

@ -1,84 +0,0 @@
class Propane
class Parser
def initialize(tokens, rules)
@token_eof = Token.new("$", nil, TOKEN_EOF)
@item_sets = []
@item_sets_set = {}
start_items = rules["Start"].patterns.map do |pattern|
pattern.components << @token_eof
Item.new(pattern, 0)
end
eval_item_sets = Set.new
eval_item_sets << ItemSet.new(start_items)
while eval_item_sets.size > 0
this_eval_item_sets = eval_item_sets
eval_item_sets = Set.new
this_eval_item_sets.each do |item_set|
unless @item_sets_set.include?(item_set)
item_set.id = @item_sets.size
@item_sets << item_set
@item_sets_set[item_set] = item_set
item_set.follow_symbols.each do |follow_symbol|
unless follow_symbol == @token_eof
follow_set = item_set.build_follow_set(follow_symbol)
eval_item_sets << follow_set
end
end
end
end
end
@item_sets.each do |item_set|
process_item_set(item_set)
puts "Item set #{item_set.id}:"
ids = item_set.in_sets.map(&:id)
if ids.size > 0
puts " (in from #{ids.join(", ")})"
end
puts item_set
item_set.follow_item_set.each do |follow_symbol, follow_item_set|
puts " #{follow_symbol.name} => #{follow_item_set.id}"
end
puts
end
end
def build_tables
shift_table = []
state_table = []
@item_sets.each do |item_set|
shift_entries = item_set.follow_symbols.select do |follow_symbol|
follow_symbol.is_a?(Token)
end.map do |follow_symbol|
{
token_id: follow_symbol.id,
state_id: item_set.follow_item_set[follow_symbol].id,
}
end
state_table << {
shift_index: shift_table.size,
n_shifts: shift_entries.size,
}
shift_table += shift_entries
end
[state_table, shift_table]
end
private
def process_item_set(item_set)
item_set.follow_symbols.each do |follow_symbol|
unless follow_symbol == @token_eof
follow_set = @item_sets_set[item_set.build_follow_set(follow_symbol)]
item_set.follow_item_set[follow_symbol] = follow_set
follow_set.in_sets << item_set
end
end
end
end
end

View File

@ -1,69 +0,0 @@
class Propane
class Parser
class Item
attr_reader :pattern
attr_reader :position
def initialize(pattern, position)
@pattern = pattern
@position = position
end
def next_component
@pattern.components[@position]
end
def hash
[@pattern, @position].hash
end
def ==(other)
@pattern == other.pattern && @position == other.position
end
def eql?(other)
self == other
end
def closed_items
if @pattern.components[@position].is_a?(Rule)
@pattern.components[@position].patterns.map do |pattern|
Item.new(pattern, 0)
end
else
[]
end
end
def follow_symbol
@pattern.components[@position]
end
def followed_by?(symbol)
follow_symbol == symbol
end
def next_position
Item.new(@pattern, @position + 1)
end
def to_s
parts = []
@pattern.components.each_with_index do |symbol, index|
if @position == index
parts << "."
end
parts << symbol.name
end
if @position == @pattern.components.size
parts << "."
end
"#{@pattern.rule.name} -> #{parts.join(" ")}"
end
end
end
end

View File

@ -1,76 +0,0 @@
class Propane
class Parser
class ItemSet
attr_reader :items
attr_accessor :id
# @return [Hash]
# Maps a follow symbol to its item set.
attr_reader :follow_item_set
# @return [Set]
# Item sets leading to this item set.
attr_reader :in_sets
def initialize(items)
@items = Set.new(items)
@follow_item_set = {}
@in_sets = Set.new
close!
end
def follow_symbols
Set.new(@items.map(&:follow_symbol).compact)
end
def build_follow_set(symbol)
ItemSet.new(items_followed_by(symbol).map(&:next_position))
end
def hash
@items.hash
end
def ==(other)
@items.eql?(other.items)
end
def eql?(other)
self == other
end
def to_s
@items.map(&:to_s).join("\n")
end
private
def close!
eval_items = @items
while eval_items.size > 0
this_eval_items = eval_items
eval_items = Set.new
this_eval_items.each do |item|
item.closed_items.each do |new_item|
unless @items.include?(new_item)
eval_items << new_item
end
end
end
@items += eval_items
end
end
def items_followed_by(symbol)
@items.select do |item|
item.followed_by?(symbol)
end
end
end
end
end

View File

@ -1,162 +0,0 @@
class Propane
class Regex
attr_reader :unit
attr_reader :nfa
def initialize(pattern)
@pattern = pattern.dup
@unit = parse_alternates
@nfa = @unit.to_nfa
if @pattern != ""
raise Error.new(%[Unexpected "#{@pattern}" in pattern])
end
end
private
def parse_alternates
au = AlternatesUnit.new
while @pattern != ""
c = @pattern[0]
return au if c == ")"
@pattern.slice!(0)
case c
when "["
au << parse_character_class
when "("
au << parse_group
when "*", "+", "?", "{"
if last_unit = au.last_unit
case c
when "*"
min_count, max_count = 0, nil
when "+"
min_count, max_count = 1, nil
when "?"
min_count, max_count = 0, 1
when "{"
min_count, max_count = parse_curly_count
end
mu = MultiplicityUnit.new(last_unit, min_count, max_count)
au.replace_last!(mu)
else
raise Error.new("#{c} follows nothing")
end
when "|"
au.new_alternate!
when "\\"
au << parse_backslash
when "."
au << period_character_class
else
au << CharacterRangeUnit.new(c)
end
end
au
end
def parse_group
au = parse_alternates
if @pattern[0] != ")"
raise Error.new("Unterminated group in pattern")
end
@pattern.slice!(0)
au
end
def parse_character_class
ccu = CharacterClassUnit.new
index = 0
loop do
if @pattern == ""
raise Error.new("Unterminated character class")
end
c = @pattern.slice!(0)
if c == "]"
break
elsif c == "^" && index == 0
ccu.negate = true
elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
ccu << CharacterRangeUnit.new(c)
elsif c == "\\"
ccu << parse_backslash
elsif c == "-" && @pattern[0] != "]"
begin_cu = ccu.last_unit
unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.code_point_range.size == 1
raise Error.new("Character range must be between single characters")
end
if @pattern[0] == "\\"
@pattern.slice!(0)
end_cu = parse_backslash
unless end_cu.is_a?(CharacterRangeUnit) && end_cu.code_point_range.size == 1
raise Error.new("Character range must be between single characters")
end
max_code_point = end_cu.code_point
else
max_code_point = @pattern[0].ord
@pattern.slice!(0)
end
cru = CharacterRangeUnit.new(begin_cu.first, max_code_point)
ccu.replace_last!(cru)
else
ccu << CharacterRangeUnit.new(c)
end
index += 1
end
ccu
end
def parse_curly_count
if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
min_count, comma, max_count, pattern = $1, $2, $3, $4
min_count = min_count.to_i
if comma.to_s == ""
max_count = min_count
elsif max_count.to_s != ""
max_count = max_count.to_i
if max_count < min_count
raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
end
else
max_count = nil
end
@pattern = pattern
[min_count, max_count]
else
raise Error.new("Unexpected match count at #{@pattern}")
end
end
def parse_backslash
if @pattern == ""
raise Error.new("Error: unfollowed \\")
else
c = @pattern.slice!(0)
case c
when "d"
CharacterRangeUnit.new("0", "9")
when "s"
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new(" ")
ccu << CharacterRangeUnit.new("\t")
ccu << CharacterRangeUnit.new("\r")
ccu << CharacterRangeUnit.new("\n")
ccu << CharacterRangeUnit.new("\f")
ccu << CharacterRangeUnit.new("\v")
ccu
else
CharacterRangeUnit.new(c)
end
end
end
def period_character_class
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
ccu
end
end
end

View File

@ -1,26 +0,0 @@
class Propane
class Regex
class NFA < FA
attr_reader :end_state
def initialize
super()
@end_state = State.new
end
class << self
def empty
nfa = NFA.new
nfa.start_state.add_transition(nil, nfa.end_state)
nfa
end
end
end
end
end

View File

@ -1,172 +0,0 @@
class Propane
class Regex
class Unit
end
class SequenceUnit < Unit
attr_accessor :units
def initialize
@units = []
end
def method_missing(*args)
@units.__send__(*args)
end
def to_nfa
if @units.empty?
NFA.empty
else
nfa = NFA.new
unit_nfas = @units.map do |unit|
unit.to_nfa
end
nfa.start_state.add_transition(nil, unit_nfas[0].start_state)
unit_nfas.reduce do |prev_nfa, next_nfa|
prev_nfa.end_state.add_transition(nil, next_nfa.start_state)
next_nfa
end.end_state.add_transition(nil, nfa.end_state)
nfa
end
end
end
class AlternatesUnit < Unit
attr_accessor :alternates
def initialize
@alternates = []
new_alternate!
end
def new_alternate!
@alternates << SequenceUnit.new
end
def <<(unit)
@alternates[-1] << unit
end
def last_unit
@alternates[-1][-1]
end
def replace_last!(new_unit)
@alternates[-1][-1] = new_unit
end
def to_nfa
if @alternates.size == 0
NFA.empty
elsif @alternates.size == 1
@alternates[0].to_nfa
else
nfa = NFA.new
alternate_nfas = @alternates.map do |alternate|
alternate.to_nfa
end
alternate_nfas.each do |alternate_nfa|
nfa.start_state.add_transition(nil, alternate_nfa.start_state)
alternate_nfa.end_state.add_transition(nil, nfa.end_state)
end
nfa
end
end
end
class CharacterRangeUnit < Unit
attr_reader :code_point_range
def initialize(c1, c2 = nil)
@code_point_range = CodePointRange.new(c1, c2)
end
def first
@code_point_range.first
end
def last
@code_point_range.last
end
def to_nfa
nfa = NFA.new
nfa.start_state.add_transition(@code_point_range, nfa.end_state)
nfa
end
end
class CharacterClassUnit < Unit
attr_accessor :units
attr_accessor :negate
def initialize
@units = []
@negate = false
end
def initialize
@units = []
end
def method_missing(*args)
@units.__send__(*args)
end
def <<(thing)
if thing.is_a?(CharacterClassUnit)
thing.each do |ccu_unit|
@units << ccu_unit
end
else
@units << thing
end
end
def last_unit
@units[-1]
end
def replace_last!(new_unit)
@units[-1] = new_unit
end
def to_nfa
nfa = NFA.new
if @units.empty?
nfa.start_state.add_transition(nil, nfa.end_state)
else
code_point_ranges = @units.map(&:code_point_range)
if @negate
code_point_ranges = CodePointRange.invert_ranges(code_point_ranges)
end
code_point_ranges.each do |code_point_range|
nfa.start_state.add_transition(code_point_range, nfa.end_state)
end
end
nfa
end
end
class MultiplicityUnit < Unit
attr_accessor :unit
attr_accessor :min_count
attr_accessor :max_count
def initialize(unit, min_count, max_count)
@unit = unit
@min_count = min_count
@max_count = max_count
end
def to_nfa
nfa = NFA.new
last_state = nfa.start_state
unit_nfa = nil
@min_count.times do
unit_nfa = @unit.to_nfa
last_state.add_transition(nil, unit_nfa.start_state)
last_state = unit_nfa.end_state
end
last_state.add_transition(nil, nfa.end_state)
if @max_count.nil?
if @min_count == 0
unit_nfa = @unit.to_nfa
last_state.add_transition(nil, unit_nfa.start_state)
end
unit_nfa.end_state.add_transition(nil, unit_nfa.start_state)
unit_nfa.end_state.add_transition(nil, nfa.end_state)
else
(@max_count - @min_count).times do
unit_nfa = @unit.to_nfa
last_state.add_transition(nil, unit_nfa.start_state)
unit_nfa.end_state.add_transition(nil, nfa.end_state)
last_state = unit_nfa.end_state
end
end
nfa
end
end
end
end

View File

@ -1,39 +0,0 @@
class Propane
class Rule
class Pattern
attr_reader :rule
attr_reader :components
attr_reader :code
def initialize(rule, components, code)
@rule = rule
@components = components
@code = code
end
end
attr_reader :id
attr_reader :name
attr_reader :patterns
def initialize(name, id)
@name = name
@id = id
@patterns = []
end
def add_pattern(components, code)
@patterns << Pattern.new(self, components, code)
end
end
end

View File

@ -1,42 +0,0 @@
class Propane
class Token
# @return [String]
# Token name.
attr_reader :name
# @return [String]
# Token pattern.
attr_reader :pattern
# @return [Integer]
# Token ID.
attr_reader :id
# @return [Regex::NFA]
# Regex NFA for matching the token.
attr_reader :nfa
def initialize(name, pattern, id)
@name = name
@pattern = pattern
@id = id
unless pattern.nil?
regex = Regex.new(pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa
end
end
def c_name
@name.upcase
end
def to_s
@name
end
end
end

View File

@ -1,3 +0,0 @@
class Propane
VERSION = "0.1.0"
end

View File

@ -1,2 +0,0 @@
#!/bin/sh
exec bundle exec ruby -Ilib bin/propane "$@"

1
refptr Submodule

@ -0,0 +1 @@
Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef

View File

@ -1,87 +0,0 @@
class Propane
describe CodePointRange do
describe "#<=>" do
it "sorts ranges" do
arr = [
CodePointRange.new(100,102),
CodePointRange.new(65, 68),
CodePointRange.new(65, 65),
CodePointRange.new(100, 100),
CodePointRange.new(68, 70),
]
arr.sort!
expect(arr[0]).to eq CodePointRange.new(65, 65)
expect(arr[1]).to eq CodePointRange.new(65, 68)
expect(arr[2]).to eq CodePointRange.new(68, 70)
expect(arr[3]).to eq CodePointRange.new(100, 100)
expect(arr[4]).to eq CodePointRange.new(100, 102)
end
end
describe "#include?" do
it "returns whether the code point is included in the range" do
expect(CodePointRange.new(100).include?(100)).to be_truthy
expect(CodePointRange.new(100, 100).include?(99)).to be_falsey
expect(CodePointRange.new(100, 100).include?(101)).to be_falsey
expect(CodePointRange.new(100, 120).include?(99)).to be_falsey
expect(CodePointRange.new(100, 120).include?(100)).to be_truthy
expect(CodePointRange.new(100, 120).include?(110)).to be_truthy
expect(CodePointRange.new(100, 120).include?(120)).to be_truthy
expect(CodePointRange.new(100, 120).include?(121)).to be_falsey
end
it "returns whether the range is included in the range" do
expect(CodePointRange.new(100).include?(CodePointRange.new(100))).to be_truthy
expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99))).to be_falsey
expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99, 100))).to be_falsey
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(90, 110))).to be_falsey
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 130))).to be_falsey
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 120))).to be_truthy
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 110))).to be_truthy
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 120))).to be_truthy
expect(CodePointRange.new(100, 120).include?(CodePointRange.new(102, 118))).to be_truthy
end
end
describe ".invert_ranges" do
it "inverts ranges" do
expect(CodePointRange.invert_ranges(
[CodePointRange.new(60, 90),
CodePointRange.new(80, 85),
CodePointRange.new(80, 100),
CodePointRange.new(101),
CodePointRange.new(200, 300)])).to eq [
CodePointRange.new(0, 59),
CodePointRange.new(102, 199),
CodePointRange.new(301, 0xFFFFFFFF)]
expect(CodePointRange.invert_ranges(
[CodePointRange.new(0, 500),
CodePointRange.new(7000, 0xFFFFFFFF)])).to eq [
CodePointRange.new(501, 6999)]
end
end
describe ".first_subrange" do
it "returns the first subrange to split" do
expect(CodePointRange.first_subrange(
[CodePointRange.new(65, 90),
CodePointRange.new(66, 66),
CodePointRange.new(80, 90)])).to eq CodePointRange.new(65)
expect(CodePointRange.first_subrange(
[CodePointRange.new(65, 90)])).to eq CodePointRange.new(65, 90)
expect(CodePointRange.first_subrange(
[CodePointRange.new(65, 90),
CodePointRange.new(80, 90)])).to eq CodePointRange.new(65, 79)
expect(CodePointRange.first_subrange(
[CodePointRange.new(65, 90),
CodePointRange.new(65, 100),
CodePointRange.new(65, 95)])).to eq CodePointRange.new(65, 90)
expect(CodePointRange.first_subrange(
[CodePointRange.new(100, 120),
CodePointRange.new(70, 90)])).to eq CodePointRange.new(70, 90)
end
end
end
end

View File

@ -1,121 +0,0 @@
class TestLexer
def initialize(token_dfa)
@token_dfa = token_dfa
end
def lex(input)
input_chars = input.chars
output = []
while lexed_token = lex_token(input_chars)
output << lexed_token
input_chars.slice!(0, lexed_token[1].size)
end
unless input_chars.empty?
raise "Unmatched input #{input_chars.join(" ")}"
end
output
end
def lex_token(input_chars)
return nil if input_chars.empty?
s = ""
current_state = @token_dfa.start_state
last_accepts = nil
last_s = nil
input_chars.each_with_index do |input_char, index|
if next_state = transition(current_state, input_char)
s += input_char
current_state = next_state
if current_state.accepts
last_accepts = current_state.accepts
last_s = s
end
else
break
end
end
if last_accepts
[last_accepts.name, last_s]
end
end
def transition(state, input_char)
state.transitions.each do |transition|
if transition.code_point_range.include?(input_char.ord)
return transition.destination
end
end
nil
end
end
def run(grammar, input)
propane = Propane.new(grammar)
token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input)
end
describe Propane::Lexer::DFA do
it "lexes a simple token" do
expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
token foo
EOF
end
it "lexes two tokens" do
expected = [
["foo", "foo"],
["bar", "bar"],
]
expect(run(<<EOF, "foobar")).to eq expected
token foo
token bar
EOF
end
it "lexes the longer of multiple options" do
expected = [
["identifier", "foobar"],
]
expect(run(<<EOF, "foobar")).to eq expected
token foo
token bar
token identifier [a-z]+
EOF
expected = [
["plusplus", "++"],
["plus", "+"],
]
expect(run(<<EOF, "+++")).to eq expected
token plus \\+
token plusplus \\+\\+
EOF
end
it "lexes whitespace" do
expected = [
["foo", "foo"],
["WS", " \t"],
["bar", "bar"],
]
expect(run(<<EOF, "foo \tbar")).to eq expected
token foo
token bar
token WS \\s+
EOF
end
it "allows dropping a matched pattern" do
expected = [
["foo", "foo"],
[nil, " \t"],
["bar", "bar"],
]
expect(run(<<EOF, "foo \tbar")).to eq expected
token foo
token bar
drop \\s+
EOF
end
end

View File

@ -1,19 +0,0 @@
class Propane
class Parser
describe Item do
it "operates properly with a set" do
rule = Object.new
item1 = Item.new(rule, 2)
item2 = Item.new(rule, 2)
expect(item1).to eq item2
expect(item1.eql?(item2)).to be_truthy
set = Set.new([item1, item2])
expect(set.size).to eq 1
end
end
end
end

View File

@ -1,333 +0,0 @@
class Propane
RSpec.describe Regex do
it "parses an empty expression" do
regex = Regex.new("")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0].size).to eq 0
end
it "parses a single character unit expression" do
regex = Regex.new("a")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
end
it "parses a group with a single character unit expression" do
regex = Regex.new("(a)")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::AlternatesUnit
alt_unit = seq_unit[0]
expect(alt_unit.alternates.size).to eq 1
expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
end
it "parses a *" do
regex = Regex.new("a*")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 0
expect(m_unit.max_count).to be_nil
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
end
it "parses a +" do
regex = Regex.new("a+")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 1
expect(m_unit.max_count).to be_nil
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
end
it "parses a ?" do
regex = Regex.new("a?")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 0
expect(m_unit.max_count).to eq 1
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
end
it "parses a multiplicity count" do
regex = Regex.new("a{5}")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 5
expect(m_unit.max_count).to eq 5
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
end
it "parses a minimum-only multiplicity count" do
regex = Regex.new("a{5,}")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 5
expect(m_unit.max_count).to be_nil
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
end
it "parses a minimum and maximum multiplicity count" do
regex = Regex.new("a{5,8}")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
m_unit = seq_unit[0]
expect(m_unit.min_count).to eq 5
expect(m_unit.max_count).to eq 8
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
expect(m_unit.unit.first).to eq "a".ord
end
it "parses an escaped *" do
regex = Regex.new("a\\*")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 2
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[0].first).to eq "a".ord
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[1].first).to eq "*".ord
end
it "parses an escaped +" do
regex = Regex.new("a\\+")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 2
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[0].first).to eq "a".ord
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[1].first).to eq "+".ord
end
it "parses an escaped \\" do
regex = Regex.new("\\\\d")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 2
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[0].first).to eq "\\".ord
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
expect(seq_unit[1].first).to eq "d".ord
end
it "parses a character class" do
regex = Regex.new("[a-z_]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_falsey
expect(ccu.size).to eq 2
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "a".ord
expect(ccu[0].last).to eq "z".ord
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
expect(ccu[1].first).to eq "_".ord
end
it "parses a negated character class" do
regex = Regex.new("[^xyz]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_truthy
expect(ccu.size).to eq 3
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "x".ord
end
it "parses - as a plain character at beginning of a character class" do
regex = Regex.new("[-9]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.size).to eq 2
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "-".ord
end
it "parses - as a plain character at end of a character class" do
regex = Regex.new("[0-]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.size).to eq 2
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "0".ord
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
expect(ccu[1].first).to eq "-".ord
end
it "parses - as a plain character at beginning of a negated character class" do
regex = Regex.new("[^-9]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_truthy
expect(ccu.size).to eq 2
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "-".ord
end
it "parses . as a plain character in a character class" do
regex = Regex.new("[.]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_falsey
expect(ccu.size).to eq 1
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq ".".ord
end
it "parses - as a plain character when escaped in middle of character class" do
regex = Regex.new("[0\\-9]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_falsey
expect(ccu.size).to eq 3
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "0".ord
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
expect(ccu[1].first).to eq "-".ord
expect(ccu[2]).to be_a Regex::CharacterRangeUnit
expect(ccu[2].first).to eq "9".ord
end
it "parses alternates" do
regex = Regex.new("ab|c")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 2
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[0].size).to eq 2
expect(regex.unit.alternates[1].size).to eq 1
end
it "parses a ." do
regex = Regex.new("a.b")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
expect(regex.unit.alternates[0][1].units.size).to eq 2
expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
end
it "parses something complex" do
regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 4
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[0].size).to eq 1
expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
expect(regex.unit.alternates[0][0].min_count).to eq 0
expect(regex.unit.alternates[0][0].max_count).to be_nil
expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[1].size).to eq 1
expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
expect(regex.unit.alternates[1][0].negate).to be_truthy
expect(regex.unit.alternates[1][0].size).to eq 1
expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[2].size).to eq 2
expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[2][0].first).to eq "|".ord
expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[2][1].first).to eq "v".ord
expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
expect(regex.unit.alternates[3].size).to eq 1
expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
expect(regex.unit.alternates[3][0].min_count).to eq 1
expect(regex.unit.alternates[3][0].max_count).to be_nil
expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
expect(regex.unit.alternates[3][0].unit.size).to eq 1
expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
expect(regex.unit.alternates[3][0].unit[0].first).to eq "x".ord
expect(regex.unit.alternates[3][0].unit[0].last).to eq "y".ord
end
end
end

View File

@ -1,97 +0,0 @@
require "fileutils"
describe Propane do
def write_grammar(grammar)
File.write("spec/run/testparser.i", grammar)
end
def build_parser
result = system(*%w[./propane.sh spec/run/testparser.i spec/run/testparser.d])
expect(result).to be_truthy
end
def compile(test_file)
result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d], test_file)
expect(result).to be_truthy
end
def run
result = system("spec/run/testparser")
expect(result).to be_truthy
end
before(:each) do
FileUtils.rm_rf("spec/run")
FileUtils.mkdir_p("spec/run")
end
it "generates a D lexer" do
write_grammar <<EOF
token int \\d+
token plus \\+
token times \\*
drop \\s+
Start: [Foo] <<
>>
Foo: [int] <<
>>
Foo: [plus] <<
>>
EOF
build_parser
compile("spec/test_d_lexer.d")
run
end
it "generates a parser" do
write_grammar <<EOF
token plus \\+
token times \\*
token zero 0
token one 1
Start: [E] <<
>>
E: [E times B] <<
>>
E: [E plus B] <<
>>
E: [B] <<
>>
B: [zero] <<
>>
B: [one] <<
>>
EOF
build_parser
end
it "distinguishes between multiple identical rules with lookahead symbol" do
write_grammar <<EOF
token a
token b
Start: [R1 a] <<
>>
Start: [R2 b] <<
>>
R1: [a b] <<
>>
R2: [a b] <<
>>
EOF
build_parser
end
it "handles reducing a rule that could be arrived at from multiple states" do
write_grammar <<EOF
token a
token b
Start: [a R1] <<
>>
Start: [b R1] <<
>>
R1: [b] <<
>>
EOF
build_parser
end
end

View File

@ -1,11 +0,0 @@
require "bundler/setup"
require "propane"
RSpec.configure do |config|
# Enable flags like --only-failures and --next-failure
config.example_status_persistence_file_path = ".rspec_status"
config.expect_with :rspec do |c|
c.syntax = :expect
end
end

View File

@ -1,66 +0,0 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
alias DCP = Testparser.Decoder.DecodedCodePoint;
string inputstring = "5+\n 66";
const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr;
size_t input_length = inputstring.length;
DCP dcp;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP('5', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP('+', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP('\n', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(' ', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP('6', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP('6', 1u));
input += dcp.code_point_length;
input_length -= dcp.code_point_length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
inputstring = "\xf0\x9f\xa7\xa1";
input = cast(const(ubyte) *)inputstring.ptr;
input_length = inputstring.length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(0x1F9E1, 4u));
}
unittest
{
alias LT = Testparser.Lexer.LexedToken;
string input = "5 + 4 * \n677 + 567";
Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length);
assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_INT));
assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_PLUS));
assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_INT));
assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_TIMES));
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
lexer = new Testparser.Lexer(null, 0u);
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
}

14
tests/Makefile Normal file
View File

@ -0,0 +1,14 @@
all:
for d in *; do \
if [ -d $$d ]; then \
make -C $$d; \
fi; \
done
clean:
for d in *; do \
if [ -d $$d ]; then \
make -C $$d clean; \
fi; \
done

15
tests/build/Makefile Normal file
View File

@ -0,0 +1,15 @@
TARGET := test
I_SOURCE := itest
CXXFLAGS := -O2
LDFLAGS := -lpcre
all: $(TARGET)
./$(TARGET)
$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
imbecile $(I_SOURCE).I
$(CXX) -o $@ *.cc $(LDFLAGS)
clean:
-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h

37
tests/build/itest.I Normal file
View File

@ -0,0 +1,37 @@
[tokens]
AND and
OR or
NOT not
LPAREN \(
RPAREN \)
WS \s+
EQUALS = %{ cout << "Saw '='" << endl; %}
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* %{
cout << "Identify: '" << matches[0] << "'" << endl;
%}
DEC_INT [1-9]\d*\b
${
uint64_t value;
$}
%{
sscanf(matches[0].c_str(), "%lld", &value);
cout << "value: " << value << endl;
%}
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
sscanf(matches[1].c_str(), "%llx", &value);
cout << "value: " << value << endl;
%}
OCT_INT 0([0-7]*)\b
BIN_INT 0b([01]+)\b
[rules]
Assignment := IDENTIFIER ASSIGN Expression
Expression := IDENTIFIER \
| Assignment

17
tests/build/main.cc Normal file
View File

@ -0,0 +1,17 @@
#include <sstream>
#include <string>
#include "itest.h"
using namespace std;
int main(int argc, char * argv[])
{
Parser p;
stringstream t(string(
"hi there (one and two and three and four) or (two = nine)\n"
"0x42 12345 0 011 0b0011\n"
));
p.parse(t);
}

202
tmpl/parser.cc Normal file
View File

@ -0,0 +1,202 @@
#include <string.h> /* memcpy() */
#include <pcre.h>
#include <iostream>
#include <vector>
#include {%header_name%}
using namespace std;
#ifdef I_NAMESPACE
namespace I_NAMESPACE {
#endif
I_CLASSNAME::I_CLASSNAME()
: m_errstr(NULL)
{
}
static TokenRef buildToken(int typeindex)
{
TokenRef token;
switch (typeindex)
{
{%buildToken%}
}
if (!token.isNull())
{
token->setType(typeindex);
}
return token;
}
static void read_istream(istream & i, vector<char> & buff, int & size)
{
size = 0;
int bytes_read;
char read_buff[1000];
while (!i.eof())
{
i.read(&read_buff[0], sizeof(read_buff));
bytes_read = i.gcount();
size += bytes_read;
for (int j = 0; j < bytes_read; j++)
buff.push_back(read_buff[j]);
}
}
bool I_CLASSNAME::parse(istream & i)
{
struct {
const char * name;
const char * definition;
bool process;
pcre * re;
pcre_extra * re_extra;
} tokens[] = {
{%token_list%}
};
if (sizeof(tokens)/sizeof(tokens[0]) == 0)
{
m_errstr = "No tokens defined";
return false;
}
vector<char> buff;
int buff_size;
read_istream(i, buff, buff_size);
if (buff_size <= 0)
{
m_errstr = "0-length input string";
return false;
}
/* append trailing NUL byte for pcre functions */
buff.push_back('\0');
/* compile all token regular expressions */
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
{
const char * errptr;
int erroffset;
tokens[i].re = pcre_compile(tokens[i].definition, 0,
&errptr, &erroffset, NULL);
if (tokens[i].re == NULL)
{
cerr << "Error compiling token '" << tokens[i].name
<< "' regular expression at position " << erroffset
<< ": " << errptr << endl;
m_errstr = "Error in token regular expression";
return false;
}
tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
}
int buff_pos = 0;
const int ovector_num_matches = 16;
const int ovector_size = 3 * (ovector_num_matches + 1);
int ovector[ovector_size];
while (buff_pos < buff_size)
{
int longest_match_length = 0;
int longest_match_index = -1;
int longest_match_ovector[ovector_size];
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
{
int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
&buff[0], buff_size, buff_pos,
PCRE_ANCHORED | PCRE_NOTEMPTY,
ovector, ovector_size);
if (rc > 0)
{
/* this pattern matched some of the input */
int len = ovector[1] - ovector[0];
if (len > longest_match_length)
{
longest_match_length = len;
longest_match_index = i;
memcpy(longest_match_ovector, ovector, sizeof(ovector));
}
}
}
if (longest_match_index < 0)
{
/* no pattern matched the input at the current position */
cerr << "Parse error" << endl;
return false;
}
Matches matches(tokens[longest_match_index].re,
&buff[0], longest_match_ovector, ovector_size);
TokenRef token = buildToken(longest_match_index);
if (token.isNull())
{
cerr << "Internal Error: null token" << endl;
return false;
}
token->process(matches);
m_tokens.push_back(token);
buff_pos += longest_match_length;
}
}
refptr<Node> Node::operator[](int index)
{
return (0 <= index && index < m_indexed_children.size())
? m_indexed_children[index]
: NULL;
}
refptr<Node> Node::operator[](const std::string & index)
{
return (m_named_children.find(index) != m_named_children.end())
? m_named_children[index]
: NULL;
}
void Token::process(const Matches & matches)
{
{%token_code%}
}
Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
: m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
{
}
std::string Matches::operator[](int index) const
{
if (0 <= index && index < (m_ovec_size / 3))
{
int idx = 2 * index;
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
{
return string(m_data, m_ovector[idx],
m_ovector[idx + 1] - m_ovector[idx]);
}
}
return "";
}
std::string Matches::operator[](const std::string & index) const
{
int idx = pcre_get_stringnumber(m_re, index.c_str());
if (idx > 0 && idx < (m_ovec_size / 3))
{
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
{
return string(m_data, m_ovector[idx],
m_ovector[idx + 1] - m_ovector[idx]);
}
}
return "";
}
{%token_classes_code%}
#ifdef I_NAMESPACE
};
#endif

181
tmpl/parser.h Normal file
View File

@ -0,0 +1,181 @@
#ifndef IMBECILE_PARSER_HEADER
#define IMBECILE_PARSER_HEADER
#include <pcre.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>
#include <map>
#include <vector>
#include <list>
{%user_includes%}
{%defines%}
#ifdef I_NAMESPACE
namespace I_NAMESPACE {
#endif
#ifndef REFPTR_H
#define REFPTR_H REFPTR_H
/* Author: Josh Holtrop
* Purpose: Provide a reference-counting pointer-like first order
* C++ object that will free the object it is pointing to when
* all references to it have been destroyed.
* This implementation does not solve the circular reference problem.
* I was not concerned with that when developing this class.
*/
#include <stdlib.h> /* NULL */
template <typename T>
class refptr
{
public:
refptr<T>();
refptr<T>(T * ptr);
refptr<T>(const refptr<T> & orig);
refptr<T> & operator=(const refptr<T> & orig);
refptr<T> & operator=(T * ptr);
~refptr<T>();
T & operator*() const { return *m_ptr; }
T * operator->() const { return m_ptr; }
bool isNull() const { return m_ptr == NULL; }
private:
void cloneFrom(const refptr<T> & orig);
void destroy();
T * m_ptr;
int * m_refCount;
};
template <typename T> refptr<T>::refptr()
{
m_ptr = NULL;
m_refCount = NULL;
}
template <typename T> refptr<T>::refptr(T * ptr)
{
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
}
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
{
cloneFrom(orig);
}
template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
{
destroy();
cloneFrom(orig);
return *this;
}
template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
{
destroy();
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
return *this;
}
template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
{
this->m_ptr = orig.m_ptr;
this->m_refCount = orig.m_refCount;
if (m_refCount != NULL)
(*m_refCount)++;
}
template <typename T> refptr<T>::~refptr()
{
destroy();
}
template <typename T> void refptr<T>::destroy()
{
if (m_refCount != NULL)
{
if (*m_refCount <= 1)
{
delete m_ptr;
delete m_refCount;
}
else
{
(*m_refCount)--;
}
}
}
#endif
class Matches
{
public:
Matches(pcre * re, const char * data, int * ovector, int ovec_size);
std::string operator[](int index) const;
std::string operator[](const std::string & index) const;
protected:
pcre * m_re;
const char * m_data;
int * m_ovector;
int m_ovec_size;
};
class Node
{
public:
refptr<Node> operator[](int index);
refptr<Node> operator[](const std::string & index);
protected:
std::map< std::string, refptr<Node> > m_named_children;
std::vector< refptr<Node> > m_indexed_children;
};
typedef refptr<Node> NodeRef;
class Token : public Node
{
public:
virtual void process(const Matches & matches);
void setType(int type) { m_type = type; }
int getType() const { return m_type; }
protected:
int m_type;
{%token_data%}
};
typedef refptr<Token> TokenRef;
{%token_classes%}
class I_CLASSNAME
{
public:
I_CLASSNAME();
bool parse(std::istream & in);
const char * getError() { return m_errstr; }
protected:
const char * m_errstr;
std::list<TokenRef> m_tokens;
};
#ifdef I_NAMESPACE
};
#endif
#endif /* IMBECILE_PARSER_HEADER */