From bc217e7ddbc27a687a33491db04f04c9d6b78a13 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Thu, 29 Apr 2021 23:18:22 -0400 Subject: [PATCH] Start on ruby branch --- .gitignore | 9 - .gitmodules | 3 - Makefile | 61 ------- Parser.cc | 423 ------------------------------------------- Parser.h | 61 ------- README | 5 - RuleDefinition.cc | 9 - RuleDefinition.h | 16 -- TokenDefinition.cc | 125 ------------- TokenDefinition.h | 37 ---- imbecile.cc | 101 ----------- refptr | 1 - tests/Makefile | 14 -- tests/build/Makefile | 15 -- tests/build/itest.I | 37 ---- tests/build/main.cc | 17 -- tmpl/parser.cc | 202 --------------------- tmpl/parser.h | 181 ------------------ 18 files changed, 1317 deletions(-) delete mode 100644 .gitignore delete mode 100644 .gitmodules delete mode 100644 Makefile delete mode 100644 Parser.cc delete mode 100644 Parser.h delete mode 100644 README delete mode 100644 RuleDefinition.cc delete mode 100644 RuleDefinition.h delete mode 100644 TokenDefinition.cc delete mode 100644 TokenDefinition.h delete mode 100644 imbecile.cc delete mode 160000 refptr delete mode 100644 tests/Makefile delete mode 100644 tests/build/Makefile delete mode 100644 tests/build/itest.I delete mode 100644 tests/build/main.cc delete mode 100644 tmpl/parser.cc delete mode 100644 tmpl/parser.h diff --git a/.gitignore b/.gitignore deleted file mode 100644 index bcf881b..0000000 --- a/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -imbecile -tags -*.o -.*.swp -*.dep -tmpl.* -tests/*/itest.cc -tests/*/itest.h -tests/*/test diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 91fd005..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "refptr"] - path = refptr - url = http://github.com/holtrop/refptr.git diff --git a/Makefile b/Makefile deleted file mode 100644 index d9e6007..0000000 --- a/Makefile +++ /dev/null @@ -1,61 +0,0 @@ - -TARGET := imbecile -CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o -CXXDEPS := $(patsubst %.o,.%.dep,$(CXXOBJS)) -CXXFLAGS := -O2 -DEPS := $(CXXDEPS) -OBJS := $(CXXOBJS) -LDFLAGS := -lpcre -CPPFLAGS := -I$(shell pwd)/refptr - -all: submodule_check tmpl.h $(TARGET) - -.PHONY: submodule_check -submodule_check: - @if [ ! -e refptr/refptr.h ]; then \ - echo Error: \"refptr\" folder is not populated.; \ - echo Perhaps you forgot to do \"git checkout --recursive\"?; \ - echo You can remedy the situation with \"git submodule update --init\".; \ - exit 1; \ - fi - -$(TARGET): $(OBJS) - $(CXX) -o $@ $^ $(LDFLAGS) - -# Object file rules -%.o: %.cc - $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $< - -# Make dependency files -.%.dep: %.c - @set -e; rm -f $@; \ - $(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@ - -.%.dep: %.cc tmpl.h - @set -e; rm -f $@; \ - $(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@ - -tmpl.cc: $(wildcard tmpl/*) - echo -n > $@ - for f in $*/*; \ - do xxd -i $$f >> $@; \ - done - -tmpl.h: tmpl.cc - echo '#ifndef $*_h' > $@ - echo '#define $*_h' >> $@ - grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@ - echo '#endif' >> $@ - -.PHONY: tests -tests: PATH := $(shell pwd):$(PATH) -tests: all - $(MAKE) -C $@ - -tests-clean: - $(MAKE) -C tests clean - -clean: tests-clean - -rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h - --include $(CXXDEPS) diff --git a/Parser.cc b/Parser.cc deleted file mode 100644 index d50aa15..0000000 --- a/Parser.cc +++ /dev/null @@ -1,423 +0,0 @@ - -#include -#include -#include -#include /* toupper() */ - -#include -#include -#include -#include - -#include "Parser.h" -#include "TokenDefinition.h" -#include "RuleDefinition.h" -#include "tmpl.h" - -using namespace std; - -#define DEBUG - -Parser::Parser() - : m_classname("Parser"), m_namespace(""), m_extension("cc"), - m_token_data(new string()), m_token_code(new string()), - m_defines(new string()) -{ -} - -void Parser::makeDefine(const string & defname, const string & definition) -{ - *m_defines += string("#define ") + defname + " " + definition + "\n"; -} - -bool Parser::write(const string & fname) -{ - if (m_tokens.size() < 1 || m_rules.size() < 1) - return false; - - string header_fname = fname + ".h"; - string body_fname = fname + "." + m_extension; - - ofstream header(header_fname.c_str()); - ofstream body(body_fname.c_str()); - - /* process data */ - refptr token_classes = new string(); - refptr token_classes_code = new string(); - int i = 0; - for (list::const_iterator it = m_tokens.begin(); - it != m_tokens.end(); - it++) - { - char buff[20]; - sprintf(buff, "%d", i++); - makeDefine((*it)->getIdentifier(), buff); - *token_classes += (*it)->getClassDefinition(); - *token_classes_code += (*it)->getProcessMethod(); - } - if (m_namespace != "") - { - makeDefine("I_NAMESPACE", m_namespace); - } - makeDefine("I_CLASSNAME", m_classname); - - /* set up replacements */ - setReplacement("token_list", buildTokenList()); - setReplacement("buildToken", buildBuildToken()); - setReplacement("header_name", - new string(string("\"") + header_fname + "\"")); - setReplacement("token_code", m_token_code); - setReplacement("token_data", m_token_data); - setReplacement("defines", m_defines); - setReplacement("token_classes", token_classes); - setReplacement("token_classes_code", token_classes_code); - - /* write the header */ - writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len); - - /* write the body */ - writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len); - - header.close(); - body.close(); - - return true; -} - -bool Parser::writeTmpl(std::ostream & out, char * dat, int len) -{ - char * newline; - char * data = dat; - const char * errptr; - int erroffset; - data[len-1] = '\n'; - const int ovec_size = 6; - int ovector[ovec_size]; - pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL); - while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL) - { - if (pcre_exec(replace, NULL, data, newline - data, - 0, 0, ovector, ovec_size) >= 0) - { - if (ovector[0] > 0) - { - out.write(data, ovector[0]); - } - out << *getReplacement(string(data, ovector[2], - ovector[3] - ovector[2])); - if (ovector[1] < newline - data) - { - out.write(data + ovector[1], newline - data - ovector[1]); - } - } - else - { - out.write(data, newline - data); - } - out << '\n'; - data = newline + 1; - } -} - -refptr Parser::getReplacement(const std::string & name) -{ - if (m_replacements.find(name) != m_replacements.end()) - { - return m_replacements[name]; - } -#ifdef DEBUG - cerr << "No replacement found for \"" << name << "\"" << endl; -#endif - return new string(""); -} - -refptr Parser::buildTokenList() -{ - refptr tokenlist = new string(); - for (list::const_iterator t = m_tokens.begin(); - t != m_tokens.end(); - t++) - { - if (t != m_tokens.begin()) - *tokenlist += " "; - *tokenlist += "{ \"" + (*t)->getName() + "\", \"" - + (*t)->getCString() + "\", " - + ((*t)->getProcessFlag() ? "true" : "false") + " }"; - if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end()) - *tokenlist += ",\n"; - } - return tokenlist; -} - -refptr Parser::buildBuildToken() -{ - refptr buildToken = new string(); - for (list::const_iterator t = m_tokens.begin(); - t != m_tokens.end(); - t++) - { - *buildToken += "case " + (*t)->getIdentifier() + ":\n"; - *buildToken += " token = new " + (*t)->getClassName() + "();\n"; - *buildToken += " break;\n"; - } - return buildToken; -} - -bool Parser::parseInputFile(char * buff, int size) -{ - typedef pcre * pcre_ptr; - enum { none, tokens, rules }; - pcre_ptr empty, comment, section_name, token, rule, - data_begin, data_end, code_begin, code_end; - struct { pcre_ptr * re; const char * pattern; } exprs[] = { - {&empty, "^\\s*$"}, - {&comment, "^\\s*#"}, - {§ion_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"}, - {&token, "^\\s*" /* possible leading ws */ - "([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */ - "\\s+" /* required whitespace */ - "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */ - {&rule, "^\\s*(\\S+)\\s*:=(.*)$"}, - {&data_begin, "^\\s*\\${"}, - {&data_end, "\\$}"}, - {&code_begin, "^\\s*%{"}, - {&code_end, "%}"} - }; - const int ovec_size = 3 * 10; - int ovector[ovec_size]; - int lineno = 0; - char * newline; - char * input = buff; - string current_section_name; - map sections; - sections["none"] = none; - sections["tokens"] = tokens; - sections["rules"] = rules; - int section = none; - string line; - bool append_line = false; - bool gathering_data = false; - bool gathering_code = false; - string gather; - bool continue_line = false; - TokenDefinitionRef current_token; - - for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) - { - const char * errptr; - int erroffset; - *exprs[i].re = pcre_compile(exprs[i].pattern, 0, - &errptr, &erroffset, NULL); - if (*exprs[i].re == NULL) - { - cerr << "Error compiling regex '" << exprs[i].pattern << - "': " << errptr << " at position " << erroffset << endl; - return false; - } - } - - for (;;) - { - if (continue_line) - { - continue_line = false; - } - else - { - if ((newline = strstr(input, "\n")) == NULL) - break; - int line_length = newline - input; - if (line_length >= 1 && newline[-1] == '\r') - { - newline[-1] = '\n'; - line_length--; - } - lineno++; - - if (append_line) - { - line += string(input, line_length); - } - else - { - line = string(input, line_length); - } - input = newline + 1; /* set up for next loop iteration */ - } - - if ( (pcre_exec(empty, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - || (pcre_exec(comment, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - ) - { - /* skip empty or comment lines */; - continue; - } - - if (! (gathering_code || gathering_data) ) - { - if (line.size() > 0 && line[line.size()-1] == '\\') - { - line[line.size()-1] = ' '; - append_line = true; - continue; - } - else - { - append_line = false; - } - - if (pcre_exec(section_name, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - current_section_name - = string(line, ovector[2], ovector[3] - ovector[2]); - if (sections.find(current_section_name) != sections.end()) - { - section = sections[current_section_name]; - } - else - { - cerr << "Unknown section name '" << current_section_name - << "'!" << endl; - return false; - } - continue; - } - } - - switch (section) - { - case none: - cerr << "Unrecognized input on line " << lineno << endl; - return false; - case tokens: - if (gathering_data) - { - if (pcre_exec(data_end, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gather += string(line, 0, ovector[0]) + "\n"; - gathering_data = false; - line = string(line, ovector[1]); - continue_line = true; - if (current_token.isNull()) - { - *m_token_data += gather; - } - else - { - current_token->addData(gather); - } - } - else - { - gather += line + "\n"; - } - continue; - } - else if (gathering_code) - { - if (pcre_exec(code_end, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gather += string(line, 0, ovector[0]) + "\n"; - gathering_code = false; - line = string(line, ovector[1]); - continue_line = true; - if (current_token.isNull()) - { - *m_token_code += gather; - } - else - { - current_token->addCode(gather); - } - } - else - { - gather += line + "\n"; - } - continue; - } - else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gathering_data = true; - gather = ""; - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - gathering_code = true; - gather = ""; - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else if (pcre_exec(token, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - current_token = new TokenDefinition(); - if (current_token->create(name, definition)) - { - addTokenDefinition(current_token); - } - else - { - cerr << "Error in token definition ending on line " - << lineno << endl; - return false; - } - line = string(line, ovector[1]); - continue_line = true; - continue; - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - case rules: - if (pcre_exec(rule, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - refptr rd = new RuleDefinition(); - if (rd->create(name, definition)) - { - addRuleDefinition(rd); - } - else - { - cerr << "Error in rule definition ending on line " - << lineno << endl; - return false; - } - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - } - } - - for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) - { - pcre_free(*exprs[i].re); - } - - return true; -} diff --git a/Parser.h b/Parser.h deleted file mode 100644 index 2ac794a..0000000 --- a/Parser.h +++ /dev/null @@ -1,61 +0,0 @@ - -#ifndef PARSER_H -#define PARSER_H - -#include -#include -#include -#include - -#include "refptr.h" -#include "TokenDefinition.h" -#include "RuleDefinition.h" - -class Parser -{ - public: - Parser(); - void addTokenDefinition(refptr td) - { - m_tokens.push_back(td); - } - void addRuleDefinition(refptr rd) - { - m_rules.push_back(rd); - } - bool write(const std::string & fname); - bool parseInputFile(char * buff, int size); - - void setClassName(const std::string & cn) { m_classname = cn; } - std::string getClassName() { return m_classname; } - - void setNamespace(const std::string & ns) { m_namespace = ns; } - std::string getNamespace() { return m_namespace; } - - void setExtension(const std::string & e) { m_extension = e; } - std::string getExtension() { return m_extension; } - - protected: - refptr buildTokenList(); - refptr buildBuildToken(); - bool writeTmpl(std::ostream & out, char * dat, int len); - refptr getReplacement(const std::string & name); - void setReplacement(const std::string & name, refptr val) - { - m_replacements[name] = val; - } - void makeDefine(const std::string & defname, - const std::string & definition); - - std::list m_tokens; - std::vector< refptr< RuleDefinition > > m_rules; - std::string m_classname; - std::string m_namespace; - std::string m_extension; - std::map< std::string, refptr > m_replacements; - refptr m_token_data; - refptr m_token_code; - refptr m_defines; -}; - -#endif diff --git a/README b/README deleted file mode 100644 index 45a38c7..0000000 --- a/README +++ /dev/null @@ -1,5 +0,0 @@ -Imbecile is a bottom-up parser generator. It targets C++ and automatically -generates a class heirarchy for interacting with the parser. - -Imbecile generates both a lexer and a parser based on the rules given to -it in the input file. diff --git a/RuleDefinition.cc b/RuleDefinition.cc deleted file mode 100644 index aac4d2c..0000000 --- a/RuleDefinition.cc +++ /dev/null @@ -1,9 +0,0 @@ - -#include "RuleDefinition.h" - -using namespace std; - -bool RuleDefinition::create(const string & name, const string & definition) -{ - m_name = name; -} diff --git a/RuleDefinition.h b/RuleDefinition.h deleted file mode 100644 index b9c82c0..0000000 --- a/RuleDefinition.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef RULEDEFINITION_H -#define RULEDEFINITION_H - -#include - -class RuleDefinition -{ - public: - bool create(const std::string & name, const std::string & definition); - - protected: - std::string m_name; -}; - -#endif diff --git a/TokenDefinition.cc b/TokenDefinition.cc deleted file mode 100644 index 0abc529..0000000 --- a/TokenDefinition.cc +++ /dev/null @@ -1,125 +0,0 @@ - -#include - -#include -#include -#include - -#include "TokenDefinition.h" -#include "refptr.h" - -using namespace std; - -#define WHITESPACE " \n\r\t\v" - -static string trim(string s) -{ - size_t lastpos = s.find_last_not_of(WHITESPACE); - if (lastpos == string::npos) - return ""; - s.erase(lastpos + 1); - s.erase(0, s.find_first_not_of(WHITESPACE)); - return s; -} - -static refptr< vector > split(const string & delim, string str) -{ - refptr< vector > ret = new vector(); - size_t pos; - while ( (pos = str.find(delim)) != string::npos ) - { - string t = str.substr(0, pos); - ret->push_back(t); - str.erase(0, pos + 1); - } - if (str != "") - ret->push_back(str); - return ret; -} - -static string c_escape(const string & orig) -{ - string result; - for (string::const_iterator it = orig.begin(); it != orig.end(); it++) - { - if (*it == '\\' || *it == '"') - result += '\\'; - result += *it; - } - return result; -} - - -TokenDefinition::TokenDefinition() - : m_process(false) -{ -} - -bool TokenDefinition::create(const string & name, - const string & definition) -{ - const char * errptr; - int erroffset; - pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL); - if (re == NULL) - { - cerr << "Error compiling regular expression '" << definition - << "' at position " << erroffset << ": " << errptr << endl; - return false; - } - m_name = name; - m_definition = definition; - pcre_free(re); - -#if 0 - refptr< vector< string > > parts = split(",", flags); - for (int i = 0, sz = parts->size(); i < sz; i++) - { - (*parts)[i] = trim((*parts)[i]); - string & s = (*parts)[i]; - if (s == "p") - { - m_process = true; - } - else - { - cerr << "Unknown token flag \"" << s << "\"" << endl; - return false; - } - } -#endif - - return true; -} - -string TokenDefinition::getCString() const -{ - return c_escape(m_definition); -} - -string TokenDefinition::getClassDefinition() const -{ - string ret = "class "+ getClassName() + " : public Token {\n"; - ret += "public:\n"; - if (m_process) - { - ret += " virtual void process(const Matches & matches);\n"; - } - ret += "\n"; - ret += "protected:\n"; - ret += m_data + "\n"; - ret += "};\n"; - return ret; -} - -string TokenDefinition::getProcessMethod() const -{ - string ret; - if (m_code != "") - { - ret += "void " + getClassName() + "::process(const Matches & matches) {\n"; - ret += m_code + "\n"; - ret += "}\n"; - } - return ret; -} diff --git a/TokenDefinition.h b/TokenDefinition.h deleted file mode 100644 index 0c0b489..0000000 --- a/TokenDefinition.h +++ /dev/null @@ -1,37 +0,0 @@ - -#ifndef TOKENDEFINITION_H -#define TOKENDEFINITION_H - -#include -#include "refptr.h" - -class TokenDefinition -{ - public: - TokenDefinition(); - bool create(const std::string & name, - const std::string & definition); - std::string getCString() const; - std::string getName() const { return m_name; } - bool getProcessFlag() const { return m_process; } - void setProcessFlag(bool p) { m_process = p; } - void addData(const std::string & d) { m_data += d; } - std::string getData() const { return m_data; } - void addCode(const std::string & c) { m_code += c; m_process = true; } - std::string getCode() const { return m_code; } - std::string getClassDefinition() const; - std::string getProcessMethod() const; - std::string getIdentifier() const { return "TK_" + m_name; } - std::string getClassName() const { return "Tk" + m_name; } - - protected: - std::string m_name; - std::string m_definition; - bool m_process; - std::string m_data; - std::string m_code; -}; - -typedef refptr TokenDefinitionRef; - -#endif diff --git a/imbecile.cc b/imbecile.cc deleted file mode 100644 index 52e15b9..0000000 --- a/imbecile.cc +++ /dev/null @@ -1,101 +0,0 @@ - -#include - -#include -#include - -#include "refptr.h" -#include "Parser.h" - -using namespace std; - -string buildOutputFilename(string & input_fname); - -int main(int argc, char * argv[]) -{ - int longind = 1; - int opt; - Parser p; - string outfile; - - static struct option longopts[] = { - /* name, has_arg, flag, val */ - { "classname", required_argument, NULL, 'c' }, - { "extension", required_argument, NULL, 'e' }, - { "namespace", required_argument, NULL, 'n' }, - { "outfile", required_argument, NULL, 'o' }, - { NULL, 0, NULL, 0 } - }; - - while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1) - { - switch (opt) - { - case 'c': /* classname */ - p.setClassName(optarg); - break; - case 'e': /* extension */ - p.setExtension(optarg); - break; - case 'n': /* namespace */ - p.setNamespace(optarg); - break; - case 'o': /* outfile */ - outfile = optarg; - break; - } - } - - if (optind >= argc) - { - cerr << "Usage: imbecile [options] " << endl; - return 1; - } - - string input_fname = argv[optind]; - ifstream ifs; - ifs.open(input_fname.c_str(), ios::binary); - if (!ifs.is_open()) - { - cerr << "Error opening input file: '" << input_fname << "'"; - return 2; - } - ifs.seekg(0, ios_base::end); - int size = ifs.tellg(); - ifs.seekg(0, ios_base::beg); - char * buff = new char[size]; - ifs.read(buff, size); - ifs.close(); - - if (outfile == "") - outfile = buildOutputFilename(input_fname); - - if (!p.parseInputFile(buff, size)) - { - cerr << "Error parsing " << input_fname << endl; - return 3; - } - if (!p.write(outfile)) - { - cerr << "Error processing " << input_fname << endl; - return 4; - } - - delete[] buff; - return 0; -} - -string buildOutputFilename(string & input_fname) -{ - string outfile; - size_t len = input_fname.length(); - if (len > 2 && input_fname.substr(len - 2) == ".I") - { - outfile = input_fname.substr(0, len - 2); - } - else - { - outfile = input_fname; - } - return outfile; -} diff --git a/refptr b/refptr deleted file mode 160000 index e2c7e88..0000000 --- a/refptr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef diff --git a/tests/Makefile b/tests/Makefile deleted file mode 100644 index decd03a..0000000 --- a/tests/Makefile +++ /dev/null @@ -1,14 +0,0 @@ - -all: - for d in *; do \ - if [ -d $$d ]; then \ - make -C $$d; \ - fi; \ - done - -clean: - for d in *; do \ - if [ -d $$d ]; then \ - make -C $$d clean; \ - fi; \ - done diff --git a/tests/build/Makefile b/tests/build/Makefile deleted file mode 100644 index 6028b98..0000000 --- a/tests/build/Makefile +++ /dev/null @@ -1,15 +0,0 @@ - -TARGET := test -I_SOURCE := itest -CXXFLAGS := -O2 -LDFLAGS := -lpcre - -all: $(TARGET) - ./$(TARGET) - -$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc) - imbecile $(I_SOURCE).I - $(CXX) -o $@ *.cc $(LDFLAGS) - -clean: - -rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h diff --git a/tests/build/itest.I b/tests/build/itest.I deleted file mode 100644 index aacf3e6..0000000 --- a/tests/build/itest.I +++ /dev/null @@ -1,37 +0,0 @@ - -[tokens] - -AND and -OR or -NOT not -LPAREN \( -RPAREN \) -WS \s+ -EQUALS = %{ cout << "Saw '='" << endl; %} -IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* %{ - cout << "Identify: '" << matches[0] << "'" << endl; -%} - -DEC_INT [1-9]\d*\b -${ - uint64_t value; -$} -%{ - sscanf(matches[0].c_str(), "%lld", &value); - cout << "value: " << value << endl; -%} - -HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{ - sscanf(matches[1].c_str(), "%llx", &value); - cout << "value: " << value << endl; -%} - -OCT_INT 0([0-7]*)\b -BIN_INT 0b([01]+)\b - -[rules] - -Assignment := IDENTIFIER ASSIGN Expression - -Expression := IDENTIFIER \ - | Assignment diff --git a/tests/build/main.cc b/tests/build/main.cc deleted file mode 100644 index 029104e..0000000 --- a/tests/build/main.cc +++ /dev/null @@ -1,17 +0,0 @@ - -#include -#include - -#include "itest.h" - -using namespace std; - -int main(int argc, char * argv[]) -{ - Parser p; - stringstream t(string( - "hi there (one and two and three and four) or (two = nine)\n" - "0x42 12345 0 011 0b0011\n" - )); - p.parse(t); -} diff --git a/tmpl/parser.cc b/tmpl/parser.cc deleted file mode 100644 index 76cb238..0000000 --- a/tmpl/parser.cc +++ /dev/null @@ -1,202 +0,0 @@ - -#include /* memcpy() */ -#include - -#include -#include - -#include {%header_name%} - -using namespace std; - -#ifdef I_NAMESPACE -namespace I_NAMESPACE { -#endif - -I_CLASSNAME::I_CLASSNAME() - : m_errstr(NULL) -{ -} - -static TokenRef buildToken(int typeindex) -{ - TokenRef token; - switch (typeindex) - { - {%buildToken%} - } - if (!token.isNull()) - { - token->setType(typeindex); - } - return token; -} - -static void read_istream(istream & i, vector & buff, int & size) -{ - size = 0; - int bytes_read; - char read_buff[1000]; - while (!i.eof()) - { - i.read(&read_buff[0], sizeof(read_buff)); - bytes_read = i.gcount(); - size += bytes_read; - for (int j = 0; j < bytes_read; j++) - buff.push_back(read_buff[j]); - } -} - -bool I_CLASSNAME::parse(istream & i) -{ - struct { - const char * name; - const char * definition; - bool process; - pcre * re; - pcre_extra * re_extra; - } tokens[] = { - {%token_list%} - }; - - if (sizeof(tokens)/sizeof(tokens[0]) == 0) - { - m_errstr = "No tokens defined"; - return false; - } - - vector buff; - int buff_size; - read_istream(i, buff, buff_size); - - if (buff_size <= 0) - { - m_errstr = "0-length input string"; - return false; - } - - /* append trailing NUL byte for pcre functions */ - buff.push_back('\0'); - - /* compile all token regular expressions */ - for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++) - { - const char * errptr; - int erroffset; - tokens[i].re = pcre_compile(tokens[i].definition, 0, - &errptr, &erroffset, NULL); - if (tokens[i].re == NULL) - { - cerr << "Error compiling token '" << tokens[i].name - << "' regular expression at position " << erroffset - << ": " << errptr << endl; - m_errstr = "Error in token regular expression"; - return false; - } - tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr); - } - - int buff_pos = 0; - const int ovector_num_matches = 16; - const int ovector_size = 3 * (ovector_num_matches + 1); - int ovector[ovector_size]; - while (buff_pos < buff_size) - { - int longest_match_length = 0; - int longest_match_index = -1; - int longest_match_ovector[ovector_size]; - for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++) - { - int rc = pcre_exec(tokens[i].re, tokens[i].re_extra, - &buff[0], buff_size, buff_pos, - PCRE_ANCHORED | PCRE_NOTEMPTY, - ovector, ovector_size); - if (rc > 0) - { - /* this pattern matched some of the input */ - int len = ovector[1] - ovector[0]; - if (len > longest_match_length) - { - longest_match_length = len; - longest_match_index = i; - memcpy(longest_match_ovector, ovector, sizeof(ovector)); - } - } - } - if (longest_match_index < 0) - { - /* no pattern matched the input at the current position */ - cerr << "Parse error" << endl; - return false; - } - Matches matches(tokens[longest_match_index].re, - &buff[0], longest_match_ovector, ovector_size); - TokenRef token = buildToken(longest_match_index); - if (token.isNull()) - { - cerr << "Internal Error: null token" << endl; - return false; - } - token->process(matches); - m_tokens.push_back(token); - buff_pos += longest_match_length; - } -} - -refptr Node::operator[](int index) -{ - return (0 <= index && index < m_indexed_children.size()) - ? m_indexed_children[index] - : NULL; -} - -refptr Node::operator[](const std::string & index) -{ - return (m_named_children.find(index) != m_named_children.end()) - ? m_named_children[index] - : NULL; -} - -void Token::process(const Matches & matches) -{ - {%token_code%} -} - -Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size) - : m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size) -{ -} - -std::string Matches::operator[](int index) const -{ - if (0 <= index && index < (m_ovec_size / 3)) - { - int idx = 2 * index; - if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0) - { - return string(m_data, m_ovector[idx], - m_ovector[idx + 1] - m_ovector[idx]); - } - } - return ""; -} - -std::string Matches::operator[](const std::string & index) const -{ - int idx = pcre_get_stringnumber(m_re, index.c_str()); - if (idx > 0 && idx < (m_ovec_size / 3)) - { - if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0) - { - return string(m_data, m_ovector[idx], - m_ovector[idx + 1] - m_ovector[idx]); - } - } - return ""; -} - -{%token_classes_code%} - -#ifdef I_NAMESPACE -}; -#endif diff --git a/tmpl/parser.h b/tmpl/parser.h deleted file mode 100644 index 5908acc..0000000 --- a/tmpl/parser.h +++ /dev/null @@ -1,181 +0,0 @@ - -#ifndef IMBECILE_PARSER_HEADER -#define IMBECILE_PARSER_HEADER - -#include -#include -#include -#include - -#include -#include -#include -#include - -{%user_includes%} - -{%defines%} - -#ifdef I_NAMESPACE -namespace I_NAMESPACE { -#endif - -#ifndef REFPTR_H -#define REFPTR_H REFPTR_H - -/* Author: Josh Holtrop - * Purpose: Provide a reference-counting pointer-like first order - * C++ object that will free the object it is pointing to when - * all references to it have been destroyed. - * This implementation does not solve the circular reference problem. - * I was not concerned with that when developing this class. - */ -#include /* NULL */ - -template -class refptr -{ - public: - refptr(); - refptr(T * ptr); - refptr(const refptr & orig); - refptr & operator=(const refptr & orig); - refptr & operator=(T * ptr); - ~refptr(); - T & operator*() const { return *m_ptr; } - T * operator->() const { return m_ptr; } - bool isNull() const { return m_ptr == NULL; } - - private: - void cloneFrom(const refptr & orig); - void destroy(); - - T * m_ptr; - int * m_refCount; -}; - -template refptr::refptr() -{ - m_ptr = NULL; - m_refCount = NULL; -} - -template refptr::refptr(T * ptr) -{ - m_ptr = ptr; - m_refCount = new int; - *m_refCount = 1; -} - -template refptr::refptr(const refptr & orig) -{ - cloneFrom(orig); -} - -template refptr & refptr::operator=(const refptr & orig) -{ - destroy(); - cloneFrom(orig); - return *this; -} - -template refptr & refptr::operator=(T * ptr) -{ - destroy(); - m_ptr = ptr; - m_refCount = new int; - *m_refCount = 1; - return *this; -} - -template void refptr::cloneFrom(const refptr & orig) -{ - this->m_ptr = orig.m_ptr; - this->m_refCount = orig.m_refCount; - if (m_refCount != NULL) - (*m_refCount)++; -} - -template refptr::~refptr() -{ - destroy(); -} - -template void refptr::destroy() -{ - if (m_refCount != NULL) - { - if (*m_refCount <= 1) - { - delete m_ptr; - delete m_refCount; - } - else - { - (*m_refCount)--; - } - } -} - -#endif - - -class Matches -{ - public: - Matches(pcre * re, const char * data, int * ovector, int ovec_size); - std::string operator[](int index) const; - std::string operator[](const std::string & index) const; - - protected: - pcre * m_re; - const char * m_data; - int * m_ovector; - int m_ovec_size; -}; - -class Node -{ - public: - refptr operator[](int index); - refptr operator[](const std::string & index); - - protected: - std::map< std::string, refptr > m_named_children; - std::vector< refptr > m_indexed_children; -}; -typedef refptr NodeRef; - -class Token : public Node -{ - public: - virtual void process(const Matches & matches); - void setType(int type) { m_type = type; } - int getType() const { return m_type; } - - protected: - int m_type; - - {%token_data%} -}; -typedef refptr TokenRef; - -{%token_classes%} - -class I_CLASSNAME -{ - public: - I_CLASSNAME(); - bool parse(std::istream & in); - const char * getError() { return m_errstr; } - - protected: - const char * m_errstr; - std::list m_tokens; -}; - -#ifdef I_NAMESPACE -}; -#endif - -#endif /* IMBECILE_PARSER_HEADER */