Start on ruby branch

This commit is contained in:
Josh Holtrop 2021-04-29 23:18:22 -04:00
parent 064bb94108
commit bc217e7ddb
18 changed files with 0 additions and 1317 deletions

9
.gitignore vendored
View File

@ -1,9 +0,0 @@
imbecile
tags
*.o
.*.swp
*.dep
tmpl.*
tests/*/itest.cc
tests/*/itest.h
tests/*/test

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "refptr"]
path = refptr
url = http://github.com/holtrop/refptr.git

View File

@ -1,61 +0,0 @@
TARGET := imbecile
CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
CXXDEPS := $(patsubst %.o,.%.dep,$(CXXOBJS))
CXXFLAGS := -O2
DEPS := $(CXXDEPS)
OBJS := $(CXXOBJS)
LDFLAGS := -lpcre
CPPFLAGS := -I$(shell pwd)/refptr
all: submodule_check tmpl.h $(TARGET)
.PHONY: submodule_check
submodule_check:
@if [ ! -e refptr/refptr.h ]; then \
echo Error: \"refptr\" folder is not populated.; \
echo Perhaps you forgot to do \"git checkout --recursive\"?; \
echo You can remedy the situation with \"git submodule update --init\".; \
exit 1; \
fi
$(TARGET): $(OBJS)
$(CXX) -o $@ $^ $(LDFLAGS)
# Object file rules
%.o: %.cc
$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
# Make dependency files
.%.dep: %.c
@set -e; rm -f $@; \
$(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
.%.dep: %.cc tmpl.h
@set -e; rm -f $@; \
$(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
tmpl.cc: $(wildcard tmpl/*)
echo -n > $@
for f in $*/*; \
do xxd -i $$f >> $@; \
done
tmpl.h: tmpl.cc
echo '#ifndef $*_h' > $@
echo '#define $*_h' >> $@
grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
echo '#endif' >> $@
.PHONY: tests
tests: PATH := $(shell pwd):$(PATH)
tests: all
$(MAKE) -C $@
tests-clean:
$(MAKE) -C tests clean
clean: tests-clean
-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
-include $(CXXDEPS)

423
Parser.cc
View File

@ -1,423 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#include <ctype.h> /* toupper() */
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include "Parser.h"
#include "TokenDefinition.h"
#include "RuleDefinition.h"
#include "tmpl.h"
using namespace std;
#define DEBUG
Parser::Parser()
: m_classname("Parser"), m_namespace(""), m_extension("cc"),
m_token_data(new string()), m_token_code(new string()),
m_defines(new string())
{
}
void Parser::makeDefine(const string & defname, const string & definition)
{
*m_defines += string("#define ") + defname + " " + definition + "\n";
}
bool Parser::write(const string & fname)
{
if (m_tokens.size() < 1 || m_rules.size() < 1)
return false;
string header_fname = fname + ".h";
string body_fname = fname + "." + m_extension;
ofstream header(header_fname.c_str());
ofstream body(body_fname.c_str());
/* process data */
refptr<string> token_classes = new string();
refptr<string> token_classes_code = new string();
int i = 0;
for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
it != m_tokens.end();
it++)
{
char buff[20];
sprintf(buff, "%d", i++);
makeDefine((*it)->getIdentifier(), buff);
*token_classes += (*it)->getClassDefinition();
*token_classes_code += (*it)->getProcessMethod();
}
if (m_namespace != "")
{
makeDefine("I_NAMESPACE", m_namespace);
}
makeDefine("I_CLASSNAME", m_classname);
/* set up replacements */
setReplacement("token_list", buildTokenList());
setReplacement("buildToken", buildBuildToken());
setReplacement("header_name",
new string(string("\"") + header_fname + "\""));
setReplacement("token_code", m_token_code);
setReplacement("token_data", m_token_data);
setReplacement("defines", m_defines);
setReplacement("token_classes", token_classes);
setReplacement("token_classes_code", token_classes_code);
/* write the header */
writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
/* write the body */
writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
header.close();
body.close();
return true;
}
bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
{
char * newline;
char * data = dat;
const char * errptr;
int erroffset;
data[len-1] = '\n';
const int ovec_size = 6;
int ovector[ovec_size];
pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
{
if (pcre_exec(replace, NULL, data, newline - data,
0, 0, ovector, ovec_size) >= 0)
{
if (ovector[0] > 0)
{
out.write(data, ovector[0]);
}
out << *getReplacement(string(data, ovector[2],
ovector[3] - ovector[2]));
if (ovector[1] < newline - data)
{
out.write(data + ovector[1], newline - data - ovector[1]);
}
}
else
{
out.write(data, newline - data);
}
out << '\n';
data = newline + 1;
}
}
refptr<std::string> Parser::getReplacement(const std::string & name)
{
if (m_replacements.find(name) != m_replacements.end())
{
return m_replacements[name];
}
#ifdef DEBUG
cerr << "No replacement found for \"" << name << "\"" << endl;
#endif
return new string("");
}
refptr<string> Parser::buildTokenList()
{
refptr<string> tokenlist = new string();
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
t != m_tokens.end();
t++)
{
if (t != m_tokens.begin())
*tokenlist += " ";
*tokenlist += "{ \"" + (*t)->getName() + "\", \""
+ (*t)->getCString() + "\", "
+ ((*t)->getProcessFlag() ? "true" : "false") + " }";
if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
*tokenlist += ",\n";
}
return tokenlist;
}
refptr<string> Parser::buildBuildToken()
{
refptr<string> buildToken = new string();
for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
t != m_tokens.end();
t++)
{
*buildToken += "case " + (*t)->getIdentifier() + ":\n";
*buildToken += " token = new " + (*t)->getClassName() + "();\n";
*buildToken += " break;\n";
}
return buildToken;
}
bool Parser::parseInputFile(char * buff, int size)
{
typedef pcre * pcre_ptr;
enum { none, tokens, rules };
pcre_ptr empty, comment, section_name, token, rule,
data_begin, data_end, code_begin, code_end;
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
{&empty, "^\\s*$"},
{&comment, "^\\s*#"},
{&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
{&token, "^\\s*" /* possible leading ws */
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
"\\s+" /* required whitespace */
"((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
{&data_begin, "^\\s*\\${"},
{&data_end, "\\$}"},
{&code_begin, "^\\s*%{"},
{&code_end, "%}"}
};
const int ovec_size = 3 * 10;
int ovector[ovec_size];
int lineno = 0;
char * newline;
char * input = buff;
string current_section_name;
map<string, int> sections;
sections["none"] = none;
sections["tokens"] = tokens;
sections["rules"] = rules;
int section = none;
string line;
bool append_line = false;
bool gathering_data = false;
bool gathering_code = false;
string gather;
bool continue_line = false;
TokenDefinitionRef current_token;
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{
const char * errptr;
int erroffset;
*exprs[i].re = pcre_compile(exprs[i].pattern, 0,
&errptr, &erroffset, NULL);
if (*exprs[i].re == NULL)
{
cerr << "Error compiling regex '" << exprs[i].pattern <<
"': " << errptr << " at position " << erroffset << endl;
return false;
}
}
for (;;)
{
if (continue_line)
{
continue_line = false;
}
else
{
if ((newline = strstr(input, "\n")) == NULL)
break;
int line_length = newline - input;
if (line_length >= 1 && newline[-1] == '\r')
{
newline[-1] = '\n';
line_length--;
}
lineno++;
if (append_line)
{
line += string(input, line_length);
}
else
{
line = string(input, line_length);
}
input = newline + 1; /* set up for next loop iteration */
}
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
|| (pcre_exec(comment, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
)
{
/* skip empty or comment lines */;
continue;
}
if (! (gathering_code || gathering_data) )
{
if (line.size() > 0 && line[line.size()-1] == '\\')
{
line[line.size()-1] = ' ';
append_line = true;
continue;
}
else
{
append_line = false;
}
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
current_section_name
= string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(current_section_name) != sections.end())
{
section = sections[current_section_name];
}
else
{
cerr << "Unknown section name '" << current_section_name
<< "'!" << endl;
return false;
}
continue;
}
}
switch (section)
{
case none:
cerr << "Unrecognized input on line " << lineno << endl;
return false;
case tokens:
if (gathering_data)
{
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_data = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
*m_token_data += gather;
}
else
{
current_token->addData(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (gathering_code)
{
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_code = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
*m_token_code += gather;
}
else
{
current_token->addCode(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_data = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_code = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line,
ovector[4], ovector[5] - ovector[4]);
current_token = new TokenDefinition();
if (current_token->create(name, definition))
{
addTokenDefinition(current_token);
}
else
{
cerr << "Error in token definition ending on line "
<< lineno << endl;
return false;
}
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else
{
cerr << "Unrecognized input on line " << lineno << endl;
return false;
}
break;
case rules:
if (pcre_exec(rule, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line,
ovector[4], ovector[5] - ovector[4]);
refptr<RuleDefinition> rd = new RuleDefinition();
if (rd->create(name, definition))
{
addRuleDefinition(rd);
}
else
{
cerr << "Error in rule definition ending on line "
<< lineno << endl;
return false;
}
}
else
{
cerr << "Unrecognized input on line " << lineno << endl;
return false;
}
break;
}
}
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{
pcre_free(*exprs[i].re);
}
return true;
}

View File

@ -1,61 +0,0 @@
#ifndef PARSER_H
#define PARSER_H
#include <vector>
#include <string>
#include <list>
#include <map>
#include "refptr.h"
#include "TokenDefinition.h"
#include "RuleDefinition.h"
class Parser
{
public:
Parser();
void addTokenDefinition(refptr<TokenDefinition> td)
{
m_tokens.push_back(td);
}
void addRuleDefinition(refptr<RuleDefinition> rd)
{
m_rules.push_back(rd);
}
bool write(const std::string & fname);
bool parseInputFile(char * buff, int size);
void setClassName(const std::string & cn) { m_classname = cn; }
std::string getClassName() { return m_classname; }
void setNamespace(const std::string & ns) { m_namespace = ns; }
std::string getNamespace() { return m_namespace; }
void setExtension(const std::string & e) { m_extension = e; }
std::string getExtension() { return m_extension; }
protected:
refptr<std::string> buildTokenList();
refptr<std::string> buildBuildToken();
bool writeTmpl(std::ostream & out, char * dat, int len);
refptr<std::string> getReplacement(const std::string & name);
void setReplacement(const std::string & name, refptr<std::string> val)
{
m_replacements[name] = val;
}
void makeDefine(const std::string & defname,
const std::string & definition);
std::list<TokenDefinitionRef> m_tokens;
std::vector< refptr< RuleDefinition > > m_rules;
std::string m_classname;
std::string m_namespace;
std::string m_extension;
std::map< std::string, refptr<std::string> > m_replacements;
refptr<std::string> m_token_data;
refptr<std::string> m_token_code;
refptr<std::string> m_defines;
};
#endif

5
README
View File

@ -1,5 +0,0 @@
Imbecile is a bottom-up parser generator. It targets C++ and automatically
generates a class heirarchy for interacting with the parser.
Imbecile generates both a lexer and a parser based on the rules given to
it in the input file.

View File

@ -1,9 +0,0 @@
#include "RuleDefinition.h"
using namespace std;
bool RuleDefinition::create(const string & name, const string & definition)
{
m_name = name;
}

View File

@ -1,16 +0,0 @@
#ifndef RULEDEFINITION_H
#define RULEDEFINITION_H
#include <string>
class RuleDefinition
{
public:
bool create(const std::string & name, const std::string & definition);
protected:
std::string m_name;
};
#endif

View File

@ -1,125 +0,0 @@
#include <pcre.h>
#include <iostream>
#include <string>
#include <vector>
#include "TokenDefinition.h"
#include "refptr.h"
using namespace std;
#define WHITESPACE " \n\r\t\v"
static string trim(string s)
{
size_t lastpos = s.find_last_not_of(WHITESPACE);
if (lastpos == string::npos)
return "";
s.erase(lastpos + 1);
s.erase(0, s.find_first_not_of(WHITESPACE));
return s;
}
static refptr< vector<string> > split(const string & delim, string str)
{
refptr< vector<string> > ret = new vector<string>();
size_t pos;
while ( (pos = str.find(delim)) != string::npos )
{
string t = str.substr(0, pos);
ret->push_back(t);
str.erase(0, pos + 1);
}
if (str != "")
ret->push_back(str);
return ret;
}
static string c_escape(const string & orig)
{
string result;
for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
{
if (*it == '\\' || *it == '"')
result += '\\';
result += *it;
}
return result;
}
TokenDefinition::TokenDefinition()
: m_process(false)
{
}
bool TokenDefinition::create(const string & name,
const string & definition)
{
const char * errptr;
int erroffset;
pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
if (re == NULL)
{
cerr << "Error compiling regular expression '" << definition
<< "' at position " << erroffset << ": " << errptr << endl;
return false;
}
m_name = name;
m_definition = definition;
pcre_free(re);
#if 0
refptr< vector< string > > parts = split(",", flags);
for (int i = 0, sz = parts->size(); i < sz; i++)
{
(*parts)[i] = trim((*parts)[i]);
string & s = (*parts)[i];
if (s == "p")
{
m_process = true;
}
else
{
cerr << "Unknown token flag \"" << s << "\"" << endl;
return false;
}
}
#endif
return true;
}
string TokenDefinition::getCString() const
{
return c_escape(m_definition);
}
string TokenDefinition::getClassDefinition() const
{
string ret = "class "+ getClassName() + " : public Token {\n";
ret += "public:\n";
if (m_process)
{
ret += " virtual void process(const Matches & matches);\n";
}
ret += "\n";
ret += "protected:\n";
ret += m_data + "\n";
ret += "};\n";
return ret;
}
string TokenDefinition::getProcessMethod() const
{
string ret;
if (m_code != "")
{
ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
ret += m_code + "\n";
ret += "}\n";
}
return ret;
}

View File

@ -1,37 +0,0 @@
#ifndef TOKENDEFINITION_H
#define TOKENDEFINITION_H
#include <string>
#include "refptr.h"
class TokenDefinition
{
public:
TokenDefinition();
bool create(const std::string & name,
const std::string & definition);
std::string getCString() const;
std::string getName() const { return m_name; }
bool getProcessFlag() const { return m_process; }
void setProcessFlag(bool p) { m_process = p; }
void addData(const std::string & d) { m_data += d; }
std::string getData() const { return m_data; }
void addCode(const std::string & c) { m_code += c; m_process = true; }
std::string getCode() const { return m_code; }
std::string getClassDefinition() const;
std::string getProcessMethod() const;
std::string getIdentifier() const { return "TK_" + m_name; }
std::string getClassName() const { return "Tk" + m_name; }
protected:
std::string m_name;
std::string m_definition;
bool m_process;
std::string m_data;
std::string m_code;
};
typedef refptr<TokenDefinition> TokenDefinitionRef;
#endif

View File

@ -1,101 +0,0 @@
#include <getopt.h>
#include <iostream>
#include <fstream>
#include "refptr.h"
#include "Parser.h"
using namespace std;
string buildOutputFilename(string & input_fname);
int main(int argc, char * argv[])
{
int longind = 1;
int opt;
Parser p;
string outfile;
static struct option longopts[] = {
/* name, has_arg, flag, val */
{ "classname", required_argument, NULL, 'c' },
{ "extension", required_argument, NULL, 'e' },
{ "namespace", required_argument, NULL, 'n' },
{ "outfile", required_argument, NULL, 'o' },
{ NULL, 0, NULL, 0 }
};
while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
{
switch (opt)
{
case 'c': /* classname */
p.setClassName(optarg);
break;
case 'e': /* extension */
p.setExtension(optarg);
break;
case 'n': /* namespace */
p.setNamespace(optarg);
break;
case 'o': /* outfile */
outfile = optarg;
break;
}
}
if (optind >= argc)
{
cerr << "Usage: imbecile [options] <input-file>" << endl;
return 1;
}
string input_fname = argv[optind];
ifstream ifs;
ifs.open(input_fname.c_str(), ios::binary);
if (!ifs.is_open())
{
cerr << "Error opening input file: '" << input_fname << "'";
return 2;
}
ifs.seekg(0, ios_base::end);
int size = ifs.tellg();
ifs.seekg(0, ios_base::beg);
char * buff = new char[size];
ifs.read(buff, size);
ifs.close();
if (outfile == "")
outfile = buildOutputFilename(input_fname);
if (!p.parseInputFile(buff, size))
{
cerr << "Error parsing " << input_fname << endl;
return 3;
}
if (!p.write(outfile))
{
cerr << "Error processing " << input_fname << endl;
return 4;
}
delete[] buff;
return 0;
}
string buildOutputFilename(string & input_fname)
{
string outfile;
size_t len = input_fname.length();
if (len > 2 && input_fname.substr(len - 2) == ".I")
{
outfile = input_fname.substr(0, len - 2);
}
else
{
outfile = input_fname;
}
return outfile;
}

1
refptr

@ -1 +0,0 @@
Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef

View File

@ -1,14 +0,0 @@
all:
for d in *; do \
if [ -d $$d ]; then \
make -C $$d; \
fi; \
done
clean:
for d in *; do \
if [ -d $$d ]; then \
make -C $$d clean; \
fi; \
done

View File

@ -1,15 +0,0 @@
TARGET := test
I_SOURCE := itest
CXXFLAGS := -O2
LDFLAGS := -lpcre
all: $(TARGET)
./$(TARGET)
$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
imbecile $(I_SOURCE).I
$(CXX) -o $@ *.cc $(LDFLAGS)
clean:
-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h

View File

@ -1,37 +0,0 @@
[tokens]
AND and
OR or
NOT not
LPAREN \(
RPAREN \)
WS \s+
EQUALS = %{ cout << "Saw '='" << endl; %}
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* %{
cout << "Identify: '" << matches[0] << "'" << endl;
%}
DEC_INT [1-9]\d*\b
${
uint64_t value;
$}
%{
sscanf(matches[0].c_str(), "%lld", &value);
cout << "value: " << value << endl;
%}
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
sscanf(matches[1].c_str(), "%llx", &value);
cout << "value: " << value << endl;
%}
OCT_INT 0([0-7]*)\b
BIN_INT 0b([01]+)\b
[rules]
Assignment := IDENTIFIER ASSIGN Expression
Expression := IDENTIFIER \
| Assignment

View File

@ -1,17 +0,0 @@
#include <sstream>
#include <string>
#include "itest.h"
using namespace std;
int main(int argc, char * argv[])
{
Parser p;
stringstream t(string(
"hi there (one and two and three and four) or (two = nine)\n"
"0x42 12345 0 011 0b0011\n"
));
p.parse(t);
}

View File

@ -1,202 +0,0 @@
#include <string.h> /* memcpy() */
#include <pcre.h>
#include <iostream>
#include <vector>
#include {%header_name%}
using namespace std;
#ifdef I_NAMESPACE
namespace I_NAMESPACE {
#endif
I_CLASSNAME::I_CLASSNAME()
: m_errstr(NULL)
{
}
static TokenRef buildToken(int typeindex)
{
TokenRef token;
switch (typeindex)
{
{%buildToken%}
}
if (!token.isNull())
{
token->setType(typeindex);
}
return token;
}
static void read_istream(istream & i, vector<char> & buff, int & size)
{
size = 0;
int bytes_read;
char read_buff[1000];
while (!i.eof())
{
i.read(&read_buff[0], sizeof(read_buff));
bytes_read = i.gcount();
size += bytes_read;
for (int j = 0; j < bytes_read; j++)
buff.push_back(read_buff[j]);
}
}
bool I_CLASSNAME::parse(istream & i)
{
struct {
const char * name;
const char * definition;
bool process;
pcre * re;
pcre_extra * re_extra;
} tokens[] = {
{%token_list%}
};
if (sizeof(tokens)/sizeof(tokens[0]) == 0)
{
m_errstr = "No tokens defined";
return false;
}
vector<char> buff;
int buff_size;
read_istream(i, buff, buff_size);
if (buff_size <= 0)
{
m_errstr = "0-length input string";
return false;
}
/* append trailing NUL byte for pcre functions */
buff.push_back('\0');
/* compile all token regular expressions */
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
{
const char * errptr;
int erroffset;
tokens[i].re = pcre_compile(tokens[i].definition, 0,
&errptr, &erroffset, NULL);
if (tokens[i].re == NULL)
{
cerr << "Error compiling token '" << tokens[i].name
<< "' regular expression at position " << erroffset
<< ": " << errptr << endl;
m_errstr = "Error in token regular expression";
return false;
}
tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
}
int buff_pos = 0;
const int ovector_num_matches = 16;
const int ovector_size = 3 * (ovector_num_matches + 1);
int ovector[ovector_size];
while (buff_pos < buff_size)
{
int longest_match_length = 0;
int longest_match_index = -1;
int longest_match_ovector[ovector_size];
for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
{
int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
&buff[0], buff_size, buff_pos,
PCRE_ANCHORED | PCRE_NOTEMPTY,
ovector, ovector_size);
if (rc > 0)
{
/* this pattern matched some of the input */
int len = ovector[1] - ovector[0];
if (len > longest_match_length)
{
longest_match_length = len;
longest_match_index = i;
memcpy(longest_match_ovector, ovector, sizeof(ovector));
}
}
}
if (longest_match_index < 0)
{
/* no pattern matched the input at the current position */
cerr << "Parse error" << endl;
return false;
}
Matches matches(tokens[longest_match_index].re,
&buff[0], longest_match_ovector, ovector_size);
TokenRef token = buildToken(longest_match_index);
if (token.isNull())
{
cerr << "Internal Error: null token" << endl;
return false;
}
token->process(matches);
m_tokens.push_back(token);
buff_pos += longest_match_length;
}
}
refptr<Node> Node::operator[](int index)
{
return (0 <= index && index < m_indexed_children.size())
? m_indexed_children[index]
: NULL;
}
refptr<Node> Node::operator[](const std::string & index)
{
return (m_named_children.find(index) != m_named_children.end())
? m_named_children[index]
: NULL;
}
void Token::process(const Matches & matches)
{
{%token_code%}
}
Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
: m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
{
}
std::string Matches::operator[](int index) const
{
if (0 <= index && index < (m_ovec_size / 3))
{
int idx = 2 * index;
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
{
return string(m_data, m_ovector[idx],
m_ovector[idx + 1] - m_ovector[idx]);
}
}
return "";
}
std::string Matches::operator[](const std::string & index) const
{
int idx = pcre_get_stringnumber(m_re, index.c_str());
if (idx > 0 && idx < (m_ovec_size / 3))
{
if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
{
return string(m_data, m_ovector[idx],
m_ovector[idx + 1] - m_ovector[idx]);
}
}
return "";
}
{%token_classes_code%}
#ifdef I_NAMESPACE
};
#endif

View File

@ -1,181 +0,0 @@
#ifndef IMBECILE_PARSER_HEADER
#define IMBECILE_PARSER_HEADER
#include <pcre.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>
#include <map>
#include <vector>
#include <list>
{%user_includes%}
{%defines%}
#ifdef I_NAMESPACE
namespace I_NAMESPACE {
#endif
#ifndef REFPTR_H
#define REFPTR_H REFPTR_H
/* Author: Josh Holtrop
* Purpose: Provide a reference-counting pointer-like first order
* C++ object that will free the object it is pointing to when
* all references to it have been destroyed.
* This implementation does not solve the circular reference problem.
* I was not concerned with that when developing this class.
*/
#include <stdlib.h> /* NULL */
template <typename T>
class refptr
{
public:
refptr<T>();
refptr<T>(T * ptr);
refptr<T>(const refptr<T> & orig);
refptr<T> & operator=(const refptr<T> & orig);
refptr<T> & operator=(T * ptr);
~refptr<T>();
T & operator*() const { return *m_ptr; }
T * operator->() const { return m_ptr; }
bool isNull() const { return m_ptr == NULL; }
private:
void cloneFrom(const refptr<T> & orig);
void destroy();
T * m_ptr;
int * m_refCount;
};
template <typename T> refptr<T>::refptr()
{
m_ptr = NULL;
m_refCount = NULL;
}
template <typename T> refptr<T>::refptr(T * ptr)
{
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
}
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
{
cloneFrom(orig);
}
template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
{
destroy();
cloneFrom(orig);
return *this;
}
template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
{
destroy();
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
return *this;
}
template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
{
this->m_ptr = orig.m_ptr;
this->m_refCount = orig.m_refCount;
if (m_refCount != NULL)
(*m_refCount)++;
}
template <typename T> refptr<T>::~refptr()
{
destroy();
}
template <typename T> void refptr<T>::destroy()
{
if (m_refCount != NULL)
{
if (*m_refCount <= 1)
{
delete m_ptr;
delete m_refCount;
}
else
{
(*m_refCount)--;
}
}
}
#endif
class Matches
{
public:
Matches(pcre * re, const char * data, int * ovector, int ovec_size);
std::string operator[](int index) const;
std::string operator[](const std::string & index) const;
protected:
pcre * m_re;
const char * m_data;
int * m_ovector;
int m_ovec_size;
};
class Node
{
public:
refptr<Node> operator[](int index);
refptr<Node> operator[](const std::string & index);
protected:
std::map< std::string, refptr<Node> > m_named_children;
std::vector< refptr<Node> > m_indexed_children;
};
typedef refptr<Node> NodeRef;
class Token : public Node
{
public:
virtual void process(const Matches & matches);
void setType(int type) { m_type = type; }
int getType() const { return m_type; }
protected:
int m_type;
{%token_data%}
};
typedef refptr<Token> TokenRef;
{%token_classes%}
class I_CLASSNAME
{
public:
I_CLASSNAME();
bool parse(std::istream & in);
const char * getError() { return m_errstr; }
protected:
const char * m_errstr;
std::list<TokenRef> m_tokens;
};
#ifdef I_NAMESPACE
};
#endif
#endif /* IMBECILE_PARSER_HEADER */