diff --git a/Parser.cc b/Parser.cc index 08a652d..70de09e 100644 --- a/Parser.cc +++ b/Parser.cc @@ -1,7 +1,16 @@ +#include +#include +#include + +#include +#include +#include +#include + #include "Parser.h" -#include -#include +#include "TokenDefinition.h" +#include "RuleDefinition.h" using namespace std; @@ -15,3 +24,171 @@ void Parser::write(const string & fname) ofs << "Content goes here" << endl; ofs.close(); } + +bool Parser::parseInputFile(char * buff, int size) +{ + typedef pcre * pcre_ptr; + enum { none, tokens, rules }; + pcre_ptr empty, comment, section_name, token, rule; + struct { pcre_ptr * re; const char * pattern; } exprs[] = { + {&empty, "^\\s*$"}, + {&comment, "^\\s*#"}, + {§ion_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"}, + {&token, "^\\s*" /* possible leading ws */ + "([a-zA-Z_][a-zA-Z_0-9]*)" /* token name */ + "\\s+" /* required whitespace */ + "((?:[^\\\\\\s]|\\\\.)+)" /* token RE */ + "(?:\\s+\\[([^\\]]+)\\])?" /* optional token flags */ + "\\s*$"}, /* possible trailing ws */ + {&rule, "^\\s*(\\S+)\\s*:=(.*)$"} + }; + const int ovec_size = 3 * 10; + int ovector[ovec_size]; + int lineno = 1; + char * newline; + char * input = buff; + string sn; + map sections; + sections["none"] = none; + sections["tokens"] = tokens; + sections["rules"] = rules; + int section = none; + string line; + bool append_line = false; + + for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) + { + const char * errptr; + int erroffset; + *exprs[i].re = pcre_compile(exprs[i].pattern, 0, + &errptr, &erroffset, NULL); + if (*exprs[i].re == NULL) + { + cerr << "Error compiling regex '" << exprs[i].pattern << + "': " << errptr << " at position " << erroffset << endl; + } + } + + while ((newline = strstr(input, "\n")) != NULL) + { + int line_length = newline - input; + if (newline[-1] == '\r') + { + line_length--; + } + if (append_line) + { + line += string(input, line_length); + } + else + { + line = string(input, line_length); + } + if (line.size() > 0 && line[line.size()-1] == '\\') + { + line[line.size()-1] = ' '; + append_line = true; + } + else + { + append_line = false; + } + if ( append_line + || (pcre_exec(empty, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + || (pcre_exec(comment, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + ) + { + /* nothing */; + } + else if (pcre_exec(section_name, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + sn = string(input, ovector[2], ovector[3] - ovector[2]); + if (sections.find(sn) != sections.end()) + { + section = sections[sn]; + } + else + { + cerr << "Unknown section name '" << sn << "'!" << endl; + return false; + } + } + else + { + switch (section) + { + case none: + cerr << "Unrecognized input on line " << lineno << endl; + return false; + case tokens: + if (pcre_exec(token, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + string name(line, ovector[2], ovector[3] - ovector[2]); + string definition(line, + ovector[4], ovector[5] - ovector[4]); + string flags; + if (ovector[6] >= 0 && ovector[7] >= 0) + { + flags = string(line, + ovector[6], ovector[7] - ovector[6]); + } + refptr td = new TokenDefinition(); + if (td->create(name, definition, flags)) + { + addTokenDefinition(td); + } + else + { + cerr << "Error in token definition ending on line " + << lineno << endl; + return false; + } + } + else + { + cerr << "Unrecognized input on line " << lineno << endl; + return false; + } + break; + case rules: + if (pcre_exec(rule, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + string name(line, ovector[2], ovector[3] - ovector[2]); + string definition(line, + ovector[4], ovector[5] - ovector[4]); + refptr rd = new RuleDefinition(); + if (rd->create(name, definition)) + { + addRuleDefinition(rd); + } + else + { + cerr << "Error in rule definition ending on line " + << lineno << endl; + return false; + } + } + else + { + cerr << "Unrecognized input on line " << lineno << endl; + return false; + } + break; + } + } + input = newline + 1; + lineno++; + } + + pcre_free(empty); + pcre_free(comment); + pcre_free(section_name); + pcre_free(token); + pcre_free(rule); + return true; +} diff --git a/Parser_parseInputFile.cc b/Parser_parseInputFile.cc deleted file mode 100644 index 17edcb4..0000000 --- a/Parser_parseInputFile.cc +++ /dev/null @@ -1,186 +0,0 @@ - -#include -#include - -#include -#include -#include -#include - -#include "Parser.h" -#include "TokenDefinition.h" -#include "RuleDefinition.h" - -using namespace std; - -enum Section{ none, tokens, rules }; - -bool Parser::parseInputFile(char * buff, int size) -{ - pcre * empty; - pcre * comment; - pcre * section_name; - pcre * token; - pcre * rule; - struct { pcre ** re; const char * pattern; } exprs[] = { - {&empty, "^\\s*$"}, - {&comment, "^\\s*#"}, - {§ion_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"}, - {&token, "^\\s*" /* possible leading ws */ - "([a-zA-Z_][a-zA-Z_0-9]*)" /* token name */ - "\\s+" /* required whitespace */ - "((?:[^\\\\\\s]|\\\\.)+)" /* token RE */ - "(?:\\s+\\[([^\\]]+)\\])?" /* optional token flags */ - "\\s*$"}, /* possible trailing ws */ - {&rule, "^\\s*(\\S+)\\s*:=(.*)$"} - }; - const int ovec_size = 3 * 10; - int ovector[ovec_size]; - int lineno = 1; - char * newline; - char * input = buff; - string sn; - map sections; - sections["none"] = none; - sections["tokens"] = tokens; - sections["rules"] = rules; - Section section = none; - string line; - bool append_line = false; - - for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) - { - const char * errptr; - int erroffset; - *exprs[i].re = pcre_compile(exprs[i].pattern, 0, - &errptr, &erroffset, NULL); - if (*exprs[i].re == NULL) - { - cerr << "Error compiling regex '" << exprs[i].pattern << - "': " << errptr << " at position " << erroffset << endl; - } - } - - while ((newline = strstr(input, "\n")) != NULL) - { - int line_length = newline - input; - if (newline[-1] == '\r') - { - line_length--; - } - if (append_line) - { - line += string(input, line_length); - } - else - { - line = string(input, line_length); - } - if (line.size() > 0 && line[line.size()-1] == '\\') - { - line[line.size()-1] = ' '; - append_line = true; - } - else - { - append_line = false; - } - if ( append_line - || (pcre_exec(empty, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - || (pcre_exec(comment, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - ) - { - /* nothing */; - } - else if (pcre_exec(section_name, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - sn = string(input, ovector[2], ovector[3] - ovector[2]); - if (sections.find(sn) != sections.end()) - { - section = sections[sn]; - } - else - { - cerr << "Unknown section name '" << sn << "'!" << endl; - return false; - } - } - else - { - switch (section) - { - case none: - cerr << "Unrecognized input on line " << lineno << endl; - return false; - case tokens: - if (pcre_exec(token, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - string flags; - if (ovector[6] >= 0 && ovector[7] >= 0) - { - flags = string(line, - ovector[6], ovector[7] - ovector[6]); - } - refptr td = new TokenDefinition(); - if (td->create(name, definition, flags)) - { - addTokenDefinition(td); - } - else - { - cerr << "Error in token definition ending on line " - << lineno << endl; - return false; - } - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - case rules: - if (pcre_exec(rule, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - string name(line, ovector[2], ovector[3] - ovector[2]); - string definition(line, - ovector[4], ovector[5] - ovector[4]); - refptr rd = new RuleDefinition(); - if (rd->create(name, definition)) - { - addRuleDefinition(rd); - } - else - { - cerr << "Error in rule definition ending on line " - << lineno << endl; - return false; - } - } - else - { - cerr << "Unrecognized input on line " << lineno << endl; - return false; - } - break; - } - } - input = newline + 1; - lineno++; - } - - pcre_free(empty); - pcre_free(comment); - pcre_free(section_name); - pcre_free(token); - pcre_free(rule); - return true; -} diff --git a/imbecile.cc b/imbecile.cc index 7d90efe..3b85d00 100644 --- a/imbecile.cc +++ b/imbecile.cc @@ -38,7 +38,7 @@ int main(int argc, char * argv[]) ifs.open(input_fname.c_str(), ios::binary); if (!ifs.is_open()) { - cerr << "Error opening input file: '" << argv[optind] << "'"; + cerr << "Error opening input file: '" << input_fname << "'"; return 2; } ifs.seekg(0, ios_base::end);