diff --git a/.gitignore b/.gitignore deleted file mode 100644 index bcf881b..0000000 --- a/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -imbecile -tags -*.o -.*.swp -*.dep -tmpl.* -tests/*/itest.cc -tests/*/itest.h -tests/*/test diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 91fd005..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "refptr"] - path = refptr - url = http://github.com/holtrop/refptr.git diff --git a/Makefile b/Makefile deleted file mode 100644 index d9e6007..0000000 --- a/Makefile +++ /dev/null @@ -1,61 +0,0 @@ - -TARGET := imbecile -CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o -CXXDEPS := $(patsubst %.o,.%.dep,$(CXXOBJS)) -CXXFLAGS := -O2 -DEPS := $(CXXDEPS) -OBJS := $(CXXOBJS) -LDFLAGS := -lpcre -CPPFLAGS := -I$(shell pwd)/refptr - -all: submodule_check tmpl.h $(TARGET) - -.PHONY: submodule_check -submodule_check: - @if [ ! -e refptr/refptr.h ]; then \ - echo Error: \"refptr\" folder is not populated.; \ - echo Perhaps you forgot to do \"git checkout --recursive\"?; \ - echo You can remedy the situation with \"git submodule update --init\".; \ - exit 1; \ - fi - -$(TARGET): $(OBJS) - $(CXX) -o $@ $^ $(LDFLAGS) - -# Object file rules -%.o: %.cc - $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $< - -# Make dependency files -.%.dep: %.c - @set -e; rm -f $@; \ - $(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@ - -.%.dep: %.cc tmpl.h - @set -e; rm -f $@; \ - $(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@ - -tmpl.cc: $(wildcard tmpl/*) - echo -n > $@ - for f in $*/*; \ - do xxd -i $$f >> $@; \ - done - -tmpl.h: tmpl.cc - echo '#ifndef $*_h' > $@ - echo '#define $*_h' >> $@ - grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@ - echo '#endif' >> $@ - -.PHONY: tests -tests: PATH := $(shell pwd):$(PATH) -tests: all - $(MAKE) -C $@ - -tests-clean: - $(MAKE) -C tests clean - -clean: tests-clean - -rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h - --include $(CXXDEPS) diff --git a/Parser.cc b/Parser.cc deleted file mode 100644 index d50aa15..0000000 --- a/Parser.cc +++ /dev/null @@ -1,423 +0,0 @@ - -#include -#include -#include -#include /* toupper() */ - -#include -#include -#include -#include - -#include "Parser.h" -#include "TokenDefinition.h" -#include "RuleDefinition.h" -#include "tmpl.h" - -using namespace std; - -#define DEBUG - -Parser::Parser() - : m_classname("Parser"), m_namespace(""), m_extension("cc"), - m_token_data(new string()), m_token_code(new string()), - m_defines(new string()) -{ -} - -void Parser::makeDefine(const string & defname, const string & definition) -{ - *m_defines += string("#define ") + defname + " " + definition + "\n"; -} - -bool Parser::write(const string & fname) -{ - if (m_tokens.size() < 1 || m_rules.size() < 1) - return false; - - string header_fname = fname + ".h"; - string body_fname = fname + "." + m_extension; - - ofstream header(header_fname.c_str()); - ofstream body(body_fname.c_str()); - - /* process data */ - refptr token_classes = new string(); - refptr token_classes_code = new string(); - int i = 0; - for (list::const_iterator it = m_tokens.begin(); - it != m_tokens.end(); - it++) - { - char buff[20]; - sprintf(buff, "%d", i++); - makeDefine((*it)->getIdentifier(), buff); - *token_classes += (*it)->getClassDefinition(); - *token_classes_code += (*it)->getProcessMethod(); - } - if (m_namespace != "") - { - makeDefine("I_NAMESPACE", m_namespace); - } - makeDefine("I_CLASSNAME", m_classname); - - /* set up replacements */ - setReplacement("token_list", buildTokenList()); - setReplacement("buildToken", buildBuildToken()); - setReplacement("header_name", - new string(string("\"") + header_fname + "\"")); - setReplacement("token_code", m_token_code); - setReplacement("token_data", m_token_data); - setReplacement("defines", m_defines); - setReplacement("token_classes", token_classes); - setReplacement("token_classes_code", token_classes_code); - - /* write the header */ - writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len); - - /* write the body */ - writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len); - - header.close(); - body.close(); - - return true; -} - -bool Parser::writeTmpl(std::ostream & out, char * dat, int len) -{ - char * newline; - char * data = dat; - const char * errptr; - int erroffset; - data[len-1] = '\n'; - const int ovec_size = 6; - int ovector[ovec_size]; - pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL); - while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL) - { - if (pcre_exec(replace, NULL, data, newline - data, - 0, 0, ovector, ovec_size) >= 0) - { - if (ovector[0] > 0) - { - out.write(data, ovector[0]); - } - out << *getReplacement(string(data, ovector[2], - ovector[3] - ovector[2])); - if (ovector[1] < newline - data) - { - out.write(data + ovector[1], newline - data - ovector[1]); - } - } - else - { - out.write(data, newline - data); - } - out << '\n'; - data = newline + 1; - } -} - -refptr Parser::getReplacement(const std::string & name) -{ - if (m_replacements.find(name) != m_replacements.end()) - { - return m_replacements[name]; - } -#ifdef DEBUG - cerr << "No replacement found for \"" << name << "\"" << endl; -#endif - return new string(""); -} - -refptr Parser::buildTokenList() -{ - refptr tokenlist = new string(); - for (list::const_iterator t = m_tokens.begin(); - t != m_tokens.end(); - t++) - { - if (t != m_tokens.begin()) - *tokenlist += " "; - *tokenlist += "{ \"" + (*t)->getName() + "\", \"" - + (*t)->getCString() + "\", " - + ((*t)->getProcessFlag() ? "true" : "false") + " }"; - if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end()) - *tokenlist += ",\n"; - } - return tokenlist; -} - -refptr Parser::buildBuildToken() -{ - refptr buildToken = new string(); - for (list::const_iterator t = m_tokens.begin(); - t != m_tokens.end(); - t++) - { - *buildToken += "case " + (*t)->getIdentifier() + ":\n"; - *buildToken += " token = new " + (*t)->getClassName() + "();\n"; - *buildToken += " break;\n"; - } - return buildToken; -} - -bool Parser::parseInputFile(char * buff, int size) -{ - typedef pcre * pcre_ptr; - enum { none, tokens, rules }; - pcre_ptr empty, comment, section_name, token, rule, - data_begin, data_end, code_begin, code_end; - struct { pcre_ptr * re; const char * pattern; } exprs[] = { - {&empty, "^\\s*$"}, - {&comment, "^\\s*#"}, - {§ion_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"}, - {&token, "^\\s*" /* possible leading ws */ - "([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */ - "\\s+" /* required whitespace */ - "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */ - {&rule, "^\\s*(\\S+)\\s*:=(.*)$"}, - {&data_begin, "^\\s*\\${"}, - {&data_end, "\\$}"}, - {&code_begin, "^\\s*%{"}, - {&code_end, "%}"} - }; - const int ovec_size = 3 * 10; - int ovector[ovec_size]; - int lineno = 0; - char * newline; - char * input = buff; - string current_section_name; - map sections; - sections["none"] = none; - sections["tokens"] = tokens; - sections["rules"] = rules; - int section = none; - string line; - bool append_line = false; - bool gathering_data = false; - bool gathering_code = false; - string gather; - bool continue_line = false; - TokenDefinitionRef current_token; - - for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) - { - const char * errptr; - int erroffset; - *exprs[i].re = pcre_compile(exprs[i].pattern, 0, - &errptr, &erroffset, NULL); - if (*exprs[i].re == NULL) - { - cerr << "Error compiling regex '" << exprs[i].pattern << - "': " << errptr << " at position " << erroffset << endl; - return false; - } - } - - for (;;) - { - if (continue_line) - { - continue_line = false; - } - else - { - if ((newline = strstr(input, "\n")) == NULL) - break; - int line_length = newline - input; - if (line_length >= 1 && newline[-1] == '\r') - { - newline[-1] = '\n'; - line_length--; - } - lineno++; - - if (append_line) - { - line += string(input, line_length); - } - else - { - line = string(input, line_length); - } - input = newline + 1; /* set up for next loop iteration */ - } - - if ( (pcre_exec(empty, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - || (pcre_exec(comment, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - ) - { - /* skip empty or comment lines */; - continue; - } - - if (! (gathering_code || gathering_data) ) - { - if (line.size() > 0 && line[line.size()-1] == '\\') - { - line[line.size()-1] = ' '; - append_line = true; - continue; - } - else - { - append_line = false; - } - - if (pcre_exec(section_name, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - current_section_name - = string(line, ovector[2], ovector[3] - ovector[2]); - if (sections.find(current_section_name) != sections.end()) - { - section = sections[current_section_name]; - } - else - { - cerr << "Unknown section name '" << current_section_name - << "'!" << endl; - return false; - } - continue; - } - } - - switch (section) - { - case none: - cerr << "Unrecognized input on line " << lineno << endl; - return false; - case tokens: - if (gathering_data) - { - if (pcre_exec(data_end, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gather += string(line, 0, ovector[0]) + "\n"; - gathering_data = false; - line = string(line, ovector[1]); - continue_line = true; - if (current_token.isNull()) - { - *m_token_data += gather; - } - else - { - current_token->addData(gather); - } - } - else - { - gather += line + "\n"; - } - continue; - } - else if (gathering_code) - { - if (pcre_exec(code_end, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gather += string(line, 0, ovector[0]) + "\n"; - gathering_code = false; - line = string(line, ovector[1]); - continue_line = true; - if (current_token.isNull()) - { - *m_token_code += gather; - } - else - { - current_token->addCode(gather); - } - } - else - { - gather += line + "\n"; - } - continue; - } - else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gathering_data = true; - gather = ""; - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gathering_code = true; - gather = ""; - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else if (pcre_exec(token, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - current_token = new TokenDefinition(); - if (current_token->create(name, definition)) - { - addTokenDefinition(current_token); - } - else - { - cerr << "Error in token definition ending on line " - << lineno << endl; - return false; - } - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - case rules: - if (pcre_exec(rule, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - refptr rd = new RuleDefinition(); - if (rd->create(name, definition)) - { - addRuleDefinition(rd); - } - else - { - cerr << "Error in rule definition ending on line " - << lineno << endl; - return false; - } - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - } - } - - for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) - { - pcre_free(*exprs[i].re); - } - - return true; -} diff --git a/Parser.h b/Parser.h deleted file mode 100644 index 2ac794a..0000000 --- a/Parser.h +++ /dev/null @@ -1,61 +0,0 @@ - -#ifndef PARSER_H -#define PARSER_H - -#include -#include -#include -#include - -#include "refptr.h" -#include "TokenDefinition.h" -#include "RuleDefinition.h" - -class Parser -{ - public: - Parser(); - void addTokenDefinition(refptr td) - { - m_tokens.push_back(td); - } - void addRuleDefinition(refptr rd) - { - m_rules.push_back(rd); - } - bool write(const std::string & fname); - bool parseInputFile(char * buff, int size); - - void setClassName(const std::string & cn) { m_classname = cn; } - std::string getClassName() { return m_classname; } - - void setNamespace(const std::string & ns) { m_namespace = ns; } - std::string getNamespace() { return m_namespace; } - - void setExtension(const std::string & e) { m_extension = e; } - std::string getExtension() { return m_extension; } - - protected: - refptr buildTokenList(); - refptr buildBuildToken(); - bool writeTmpl(std::ostream & out, char * dat, int len); - refptr getReplacement(const std::string & name); - void setReplacement(const std::string & name, refptr val) - { - m_replacements[name] = val; - } - void makeDefine(const std::string & defname, - const std::string & definition); - - std::list m_tokens; - std::vector< refptr< RuleDefinition > > m_rules; - std::string m_classname; - std::string m_namespace; - std::string m_extension; - std::map< std::string, refptr > m_replacements; - refptr m_token_data; - refptr m_token_code; - refptr m_defines; -}; - -#endif diff --git a/README b/README deleted file mode 100644 index 45a38c7..0000000 --- a/README +++ /dev/null @@ -1,5 +0,0 @@ -Imbecile is a bottom-up parser generator. It targets C++ and automatically -generates a class heirarchy for interacting with the parser. - -Imbecile generates both a lexer and a parser based on the rules given to -it in the input file. diff --git a/RuleDefinition.cc b/RuleDefinition.cc deleted file mode 100644 index aac4d2c..0000000 --- a/RuleDefinition.cc +++ /dev/null @@ -1,9 +0,0 @@ - -#include "RuleDefinition.h" - -using namespace std; - -bool RuleDefinition::create(const string & name, const string & definition) -{ - m_name = name; -} diff --git a/RuleDefinition.h b/RuleDefinition.h deleted file mode 100644 index b9c82c0..0000000 --- a/RuleDefinition.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef RULEDEFINITION_H -#define RULEDEFINITION_H - -#include - -class RuleDefinition -{ - public: - bool create(const std::string & name, const std::string & definition); - - protected: - std::string m_name; -}; - -#endif diff --git a/TokenDefinition.cc b/TokenDefinition.cc deleted file mode 100644 index 0abc529..0000000 --- a/TokenDefinition.cc +++ /dev/null @@ -1,125 +0,0 @@ - -#include - -#include -#include -#include - -#include "TokenDefinition.h" -#include "refptr.h" - -using namespace std; - -#define WHITESPACE " \n\r\t\v" - -static string trim(string s) -{ - size_t lastpos = s.find_last_not_of(WHITESPACE); - if (lastpos == string::npos) - return ""; - s.erase(lastpos + 1); - s.erase(0, s.find_first_not_of(WHITESPACE)); - return s; -} - -static refptr< vector > split(const string & delim, string str) -{ - refptr< vector > ret = new vector(); - size_t pos; - while ( (pos = str.find(delim)) != string::npos ) - { - string t = str.substr(0, pos); - ret->push_back(t); - str.erase(0, pos + 1); - } - if (str != "") - ret->push_back(str); - return ret; -} - -static string c_escape(const string & orig) -{ - string result; - for (string::const_iterator it = orig.begin(); it != orig.end(); it++) - { - if (*it == '\\' || *it == '"') - result += '\\'; - result += *it; - } - return result; -} - - -TokenDefinition::TokenDefinition() - : m_process(false) -{ -} - -bool TokenDefinition::create(const string & name, - const string & definition) -{ - const char * errptr; - int erroffset; - pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL); - if (re == NULL) - { - cerr << "Error compiling regular expression '" << definition - << "' at position " << erroffset << ": " << errptr << endl; - return false; - } - m_name = name; - m_definition = definition; - pcre_free(re); - -#if 0 - refptr< vector< string > > parts = split(",", flags); - for (int i = 0, sz = parts->size(); i < sz; i++) - { - (*parts)[i] = trim((*parts)[i]); - string & s = (*parts)[i]; - if (s == "p") - { - m_process = true; - } - else - { - cerr << "Unknown token flag \"" << s << "\"" << endl; - return false; - } - } -#endif - - return true; -} - -string TokenDefinition::getCString() const -{ - return c_escape(m_definition); -} - -string TokenDefinition::getClassDefinition() const -{ - string ret = "class "+ getClassName() + " : public Token {\n"; - ret += "public:\n"; - if (m_process) - { - ret += " virtual void process(const Matches & matches);\n"; - } - ret += "\n"; - ret += "protected:\n"; - ret += m_data + "\n"; - ret += "};\n"; - return ret; -} - -string TokenDefinition::getProcessMethod() const -{ - string ret; - if (m_code != "") - { - ret += "void " + getClassName() + "::process(const Matches & matches) {\n"; - ret += m_code + "\n"; - ret += "}\n"; - } - return ret; -} diff --git a/TokenDefinition.h b/TokenDefinition.h deleted file mode 100644 index 0c0b489..0000000 --- a/TokenDefinition.h +++ /dev/null @@ -1,37 +0,0 @@ - -#ifndef TOKENDEFINITION_H -#define TOKENDEFINITION_H - -#include -#include "refptr.h" - -class TokenDefinition -{ - public: - TokenDefinition(); - bool create(const std::string & name, - const std::string & definition); - std::string getCString() const; - std::string getName() const { return m_name; } - bool getProcessFlag() const { return m_process; } - void setProcessFlag(bool p) { m_process = p; } - void addData(const std::string & d) { m_data += d; } - std::string getData() const { return m_data; } - void addCode(const std::string & c) { m_code += c; m_process = true; } - std::string getCode() const { return m_code; } - std::string getClassDefinition() const; - std::string getProcessMethod() const; - std::string getIdentifier() const { return "TK_" + m_name; } - std::string getClassName() const { return "Tk" + m_name; } - - protected: - std::string m_name; - std::string m_definition; - bool m_process; - std::string m_data; - std::string m_code; -}; - -typedef refptr TokenDefinitionRef; - -#endif diff --git a/imbecile.cc b/imbecile.cc deleted file mode 100644 index 52e15b9..0000000 --- a/imbecile.cc +++ /dev/null @@ -1,101 +0,0 @@ - -#include - -#include -#include - -#include "refptr.h" -#include "Parser.h" - -using namespace std; - -string buildOutputFilename(string & input_fname); - -int main(int argc, char * argv[]) -{ - int longind = 1; - int opt; - Parser p; - string outfile; - - static struct option longopts[] = { - /* name, has_arg, flag, val */ - { "classname", required_argument, NULL, 'c' }, - { "extension", required_argument, NULL, 'e' }, - { "namespace", required_argument, NULL, 'n' }, - { "outfile", required_argument, NULL, 'o' }, - { NULL, 0, NULL, 0 } - }; - - while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1) - { - switch (opt) - { - case 'c': /* classname */ - p.setClassName(optarg); - break; - case 'e': /* extension */ - p.setExtension(optarg); - break; - case 'n': /* namespace */ - p.setNamespace(optarg); - break; - case 'o': /* outfile */ - outfile = optarg; - break; - } - } - - if (optind >= argc) - { - cerr << "Usage: imbecile [options] " << endl; - return 1; - } - - string input_fname = argv[optind]; - ifstream ifs; - ifs.open(input_fname.c_str(), ios::binary); - if (!ifs.is_open()) - { - cerr << "Error opening input file: '" << input_fname << "'"; - return 2; - } - ifs.seekg(0, ios_base::end); - int size = ifs.tellg(); - ifs.seekg(0, ios_base::beg); - char * buff = new char[size]; - ifs.read(buff, size); - ifs.close(); - - if (outfile == "") - outfile = buildOutputFilename(input_fname); - - if (!p.parseInputFile(buff, size)) - { - cerr << "Error parsing " << input_fname << endl; - return 3; - } - if (!p.write(outfile)) - { - cerr << "Error processing " << input_fname << endl; - return 4; - } - - delete[] buff; - return 0; -} - -string buildOutputFilename(string & input_fname) -{ - string outfile; - size_t len = input_fname.length(); - if (len > 2 && input_fname.substr(len - 2) == ".I") - { - outfile = input_fname.substr(0, len - 2); - } - else - { - outfile = input_fname; - } - return outfile; -} diff --git a/refptr b/refptr deleted file mode 160000 index e2c7e88..0000000 --- a/refptr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef diff --git a/tests/Makefile b/tests/Makefile deleted file mode 100644 index decd03a..0000000 --- a/tests/Makefile +++ /dev/null @@ -1,14 +0,0 @@ - -all: - for d in *; do \ - if [ -d $$d ]; then \ - make -C $$d; \ - fi; \ - done - -clean: - for d in *; do \ - if [ -d $$d ]; then \ - make -C $$d clean; \ - fi; \ - done diff --git a/tests/build/Makefile b/tests/build/Makefile deleted file mode 100644 index 6028b98..0000000 --- a/tests/build/Makefile +++ /dev/null @@ -1,15 +0,0 @@ - -TARGET := test -I_SOURCE := itest -CXXFLAGS := -O2 -LDFLAGS := -lpcre - -all: $(TARGET) - ./$(TARGET) - -$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc) - imbecile $(I_SOURCE).I - $(CXX) -o $@ *.cc $(LDFLAGS) - -clean: - -rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h diff --git a/tests/build/itest.I b/tests/build/itest.I deleted file mode 100644 index aacf3e6..0000000 --- a/tests/build/itest.I +++ /dev/null @@ -1,37 +0,0 @@ - -[tokens] - -AND and -OR or -NOT not -LPAREN \( -RPAREN \) -WS \s+ -EQUALS = %{ cout << "Saw '='" << endl; %} -IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* %{ - cout << "Identify: '" << matches[0] << "'" << endl; -%} - -DEC_INT [1-9]\d*\b -${ - uint64_t value; -$} -%{ - sscanf(matches[0].c_str(), "%lld", &value); - cout << "value: " << value << endl; -%} - -HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{ - sscanf(matches[1].c_str(), "%llx", &value); - cout << "value: " << value << endl; -%} - -OCT_INT 0([0-7]*)\b -BIN_INT 0b([01]+)\b - -[rules] - -Assignment := IDENTIFIER ASSIGN Expression - -Expression := IDENTIFIER \ - | Assignment diff --git a/tests/build/main.cc b/tests/build/main.cc deleted file mode 100644 index 029104e..0000000 --- a/tests/build/main.cc +++ /dev/null @@ -1,17 +0,0 @@ - -#include -#include - -#include "itest.h" - -using namespace std; - -int main(int argc, char * argv[]) -{ - Parser p; - stringstream t(string( - "hi there (one and two and three and four) or (two = nine)\n" - "0x42 12345 0 011 0b0011\n" - )); - p.parse(t); -} diff --git a/tmpl/parser.cc b/tmpl/parser.cc deleted file mode 100644 index 76cb238..0000000 --- a/tmpl/parser.cc +++ /dev/null @@ -1,202 +0,0 @@ - -#include /* memcpy() */ -#include - -#include -#include - -#include {%header_name%} - -using namespace std; - -#ifdef I_NAMESPACE -namespace I_NAMESPACE { -#endif - -I_CLASSNAME::I_CLASSNAME() - : m_errstr(NULL) -{ -} - -static TokenRef buildToken(int typeindex) -{ - TokenRef token; - switch (typeindex) - { - {%buildToken%} - } - if (!token.isNull()) - { - token->setType(typeindex); - } - return token; -} - -static void read_istream(istream & i, vector & buff, int & size) -{ - size = 0; - int bytes_read; - char read_buff[1000]; - while (!i.eof()) - { - i.read(&read_buff[0], sizeof(read_buff)); - bytes_read = i.gcount(); - size += bytes_read; - for (int j = 0; j < bytes_read; j++) - buff.push_back(read_buff[j]); - } -} - -bool I_CLASSNAME::parse(istream & i) -{ - struct { - const char * name; - const char * definition; - bool process; - pcre * re; - pcre_extra * re_extra; - } tokens[] = { - {%token_list%} - }; - - if (sizeof(tokens)/sizeof(tokens[0]) == 0) - { - m_errstr = "No tokens defined"; - return false; - } - - vector buff; - int buff_size; - read_istream(i, buff, buff_size); - - if (buff_size <= 0) - { - m_errstr = "0-length input string"; - return false; - } - - /* append trailing NUL byte for pcre functions */ - buff.push_back('\0'); - - /* compile all token regular expressions */ - for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++) - { - const char * errptr; - int erroffset; - tokens[i].re = pcre_compile(tokens[i].definition, 0, - &errptr, &erroffset, NULL); - if (tokens[i].re == NULL) - { - cerr << "Error compiling token '" << tokens[i].name - << "' regular expression at position " << erroffset - << ": " << errptr << endl; - m_errstr = "Error in token regular expression"; - return false; - } - tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr); - } - - int buff_pos = 0; - const int ovector_num_matches = 16; - const int ovector_size = 3 * (ovector_num_matches + 1); - int ovector[ovector_size]; - while (buff_pos < buff_size) - { - int longest_match_length = 0; - int longest_match_index = -1; - int longest_match_ovector[ovector_size]; - for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++) - { - int rc = pcre_exec(tokens[i].re, tokens[i].re_extra, - &buff[0], buff_size, buff_pos, - PCRE_ANCHORED | PCRE_NOTEMPTY, - ovector, ovector_size); - if (rc > 0) - { - /* this pattern matched some of the input */ - int len = ovector[1] - ovector[0]; - if (len > longest_match_length) - { - longest_match_length = len; - longest_match_index = i; - memcpy(longest_match_ovector, ovector, sizeof(ovector)); - } - } - } - if (longest_match_index < 0) - { - /* no pattern matched the input at the current position */ - cerr << "Parse error" << endl; - return false; - } - Matches matches(tokens[longest_match_index].re, - &buff[0], longest_match_ovector, ovector_size); - TokenRef token = buildToken(longest_match_index); - if (token.isNull()) - { - cerr << "Internal Error: null token" << endl; - return false; - } - token->process(matches); - m_tokens.push_back(token); - buff_pos += longest_match_length; - } -} - -refptr Node::operator[](int index) -{ - return (0 <= index && index < m_indexed_children.size()) - ? m_indexed_children[index] - : NULL; -} - -refptr Node::operator[](const std::string & index) -{ - return (m_named_children.find(index) != m_named_children.end()) - ? m_named_children[index] - : NULL; -} - -void Token::process(const Matches & matches) -{ - {%token_code%} -} - -Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size) - : m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size) -{ -} - -std::string Matches::operator[](int index) const -{ - if (0 <= index && index < (m_ovec_size / 3)) - { - int idx = 2 * index; - if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0) - { - return string(m_data, m_ovector[idx], - m_ovector[idx + 1] - m_ovector[idx]); - } - } - return ""; -} - -std::string Matches::operator[](const std::string & index) const -{ - int idx = pcre_get_stringnumber(m_re, index.c_str()); - if (idx > 0 && idx < (m_ovec_size / 3)) - { - if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0) - { - return string(m_data, m_ovector[idx], - m_ovector[idx + 1] - m_ovector[idx]); - } - } - return ""; -} - -{%token_classes_code%} - -#ifdef I_NAMESPACE -}; -#endif diff --git a/tmpl/parser.h b/tmpl/parser.h deleted file mode 100644 index 5908acc..0000000 --- a/tmpl/parser.h +++ /dev/null @@ -1,181 +0,0 @@ - -#ifndef IMBECILE_PARSER_HEADER -#define IMBECILE_PARSER_HEADER - -#include -#include -#include -#include - -#include -#include -#include -#include - -{%user_includes%} - -{%defines%} - -#ifdef I_NAMESPACE -namespace I_NAMESPACE { -#endif - -#ifndef REFPTR_H -#define REFPTR_H REFPTR_H - -/* Author: Josh Holtrop - * Purpose: Provide a reference-counting pointer-like first order - * C++ object that will free the object it is pointing to when - * all references to it have been destroyed. - * This implementation does not solve the circular reference problem. - * I was not concerned with that when developing this class. - */ -#include /* NULL */ - -template -class refptr -{ - public: - refptr(); - refptr(T * ptr); - refptr(const refptr & orig); - refptr & operator=(const refptr & orig); - refptr & operator=(T * ptr); - ~refptr(); - T & operator*() const { return *m_ptr; } - T * operator->() const { return m_ptr; } - bool isNull() const { return m_ptr == NULL; } - - private: - void cloneFrom(const refptr & orig); - void destroy(); - - T * m_ptr; - int * m_refCount; -}; - -template refptr::refptr() -{ - m_ptr = NULL; - m_refCount = NULL; -} - -template refptr::refptr(T * ptr) -{ - m_ptr = ptr; - m_refCount = new int; - *m_refCount = 1; -} - -template refptr::refptr(const refptr & orig) -{ - cloneFrom(orig); -} - -template refptr & refptr::operator=(const refptr & orig) -{ - destroy(); - cloneFrom(orig); - return *this; -} - -template refptr & refptr::operator=(T * ptr) -{ - destroy(); - m_ptr = ptr; - m_refCount = new int; - *m_refCount = 1; - return *this; -} - -template void refptr::cloneFrom(const refptr & orig) -{ - this->m_ptr = orig.m_ptr; - this->m_refCount = orig.m_refCount; - if (m_refCount != NULL) - (*m_refCount)++; -} - -template refptr::~refptr() -{ - destroy(); -} - -template void refptr::destroy() -{ - if (m_refCount != NULL) - { - if (*m_refCount <= 1) - { - delete m_ptr; - delete m_refCount; - } - else - { - (*m_refCount)--; - } - } -} - -#endif - - -class Matches -{ - public: - Matches(pcre * re, const char * data, int * ovector, int ovec_size); - std::string operator[](int index) const; - std::string operator[](const std::string & index) const; - - protected: - pcre * m_re; - const char * m_data; - int * m_ovector; - int m_ovec_size; -}; - -class Node -{ - public: - refptr operator[](int index); - refptr operator[](const std::string & index); - - protected: - std::map< std::string, refptr > m_named_children; - std::vector< refptr > m_indexed_children; -}; -typedef refptr NodeRef; - -class Token : public Node -{ - public: - virtual void process(const Matches & matches); - void setType(int type) { m_type = type; } - int getType() const { return m_type; } - - protected: - int m_type; - - {%token_data%} -}; -typedef refptr TokenRef; - -{%token_classes%} - -class I_CLASSNAME -{ - public: - I_CLASSNAME(); - bool parse(std::istream & in); - const char * getError() { return m_errstr; } - - protected: - const char * m_errstr; - std::list m_tokens; -}; - -#ifdef I_NAMESPACE -}; -#endif - -#endif /* IMBECILE_PARSER_HEADER */