From 762ff3d561f1c87598e260ad1ea0c23c9315e358 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 19 May 2010 12:00:06 -0400 Subject: [PATCH] added data gathering; changed the way gathering was done (now ${...$} and %{...%}); added continue_line in parseInputFile() --- Parser.cc | 198 +++++++++++++++++++++++++++++--------------- TokenDefinition.cc | 4 +- TokenDefinition.h | 11 ++- tests/build/itest.I | 18 +++- 4 files changed, 159 insertions(+), 72 deletions(-) diff --git a/Parser.cc b/Parser.cc index f9a8473..0ac0841 100644 --- a/Parser.cc +++ b/Parser.cc @@ -82,7 +82,8 @@ bool Parser::parseInputFile(char * buff, int size) { typedef pcre * pcre_ptr; enum { none, tokens, rules }; - pcre_ptr empty, comment, section_name, token, rule, code_end_sec; + pcre_ptr empty, comment, section_name, token, rule, + data_begin, data_end, code_begin, code_end; struct { pcre_ptr * re; const char * pattern; } exprs[] = { {&empty, "^\\s*$"}, {&comment, "^\\s*#"}, @@ -90,19 +91,19 @@ bool Parser::parseInputFile(char * buff, int size) {&token, "^\\s*" /* possible leading ws */ "([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */ "\\s+" /* required whitespace */ - "((?:[^\\\\\\s]|\\\\.)+)" /* 2: token RE */ - "(?:\\s+\\[([^\\]]+)\\])?" /* 3: token flags */ - "\\s*({{)?" /* 4: code section opener */ - "\\s*$"}, /* possible trailing ws */ + "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */ {&rule, "^\\s*(\\S+)\\s*:=(.*)$"}, - {&code_end_sec, "^\\s*}}\\s*$"} + {&data_begin, "^\\s*\\${"}, + {&data_end, "\\$}"}, + {&code_begin, "^\\s*%{"}, + {&code_end, "%}"} }; const int ovec_size = 3 * 10; int ovector[ovec_size]; int lineno = 0; char * newline; char * input = buff; - string sn; + string current_section_name; map sections; sections["none"] = none; sections["tokens"] = tokens; @@ -110,8 +111,11 @@ bool Parser::parseInputFile(char * buff, int size) int section = none; string line; bool append_line = false; + bool gathering_data = false; bool gathering_code = false; - string code; + string gather; + bool continue_line = false; + TokenDefinitionRef current_token; for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) { @@ -127,40 +131,33 @@ bool Parser::parseInputFile(char * buff, int size) } } - while ((newline = strstr(input, "\n")) != NULL) + for (;;) { - int line_length = newline - input; - if (line_length >= 1 && newline[-1] == '\r') + if (continue_line) { - newline[-1] = '\n'; - line_length--; - } - lineno++; - - if (append_line) - { - line += string(input, line_length); + continue_line = false; } else { - line = string(input, line_length); - } - input = newline + 1; /* set up for next loop iteration */ - - if (gathering_code) - { - if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) + if ((newline = strstr(input, "\n")) == NULL) + break; + int line_length = newline - input; + if (line_length >= 1 && newline[-1] == '\r') { - gathering_code = false; - code += "}\n"; - /* TODO: do something with gathered code */ + newline[-1] = '\n'; + line_length--; + } + lineno++; + + if (append_line) + { + line += string(input, line_length); } else { - code += line; + line = string(input, line_length); } - continue; + input = newline + 1; /* set up for next loop iteration */ } if ( (pcre_exec(empty, NULL, line.c_str(), line.size(), @@ -173,31 +170,36 @@ bool Parser::parseInputFile(char * buff, int size) continue; } - if (line.size() > 0 && line[line.size()-1] == '\\') + if (! (gathering_code || gathering_data) ) { - line[line.size()-1] = ' '; - append_line = true; - continue; - } - else - { - append_line = false; - } - - if (pcre_exec(section_name, NULL, line.c_str(), line.size(), - 0, 0, ovector, ovec_size) >= 0) - { - sn = string(line, ovector[2], ovector[3] - ovector[2]); - if (sections.find(sn) != sections.end()) + if (line.size() > 0 && line[line.size()-1] == '\\') { - section = sections[sn]; + line[line.size()-1] = ' '; + append_line = true; + continue; } else { - cerr << "Unknown section name '" << sn << "'!" << endl; - return false; + append_line = false; + } + + if (pcre_exec(section_name, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + current_section_name + = string(line, ovector[2], ovector[3] - ovector[2]); + if (sections.find(current_section_name) != sections.end()) + { + section = sections[current_section_name]; + } + else + { + cerr << "Unknown section name '" << current_section_name + << "'!" << endl; + return false; + } + continue; } - continue; } switch (section) @@ -206,22 +208,86 @@ bool Parser::parseInputFile(char * buff, int size) cerr << "Unrecognized input on line " << lineno << endl; return false; case tokens: - if (pcre_exec(token, NULL, line.c_str(), line.size(), + if (gathering_data) + { + if (pcre_exec(data_end, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + gather += string(line, 0, ovector[0]) + "\n"; + gathering_data = false; + line = string(line, ovector[1]); + continue_line = true; + if (current_token.isNull()) + { + cerr << "Data section with no corresponding " + "token definition on line " << lineno << endl; + return false; + } + else + { + current_token->addData(gather); + } + } + else + { + gather += line + "\n"; + } + continue; + } + else if (gathering_code) + { + if (pcre_exec(code_end, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + gather += string(line, 0, ovector[0]) + "\n"; + gathering_code = false; + line = string(line, ovector[1]); + continue_line = true; + if (current_token.isNull()) + { + cerr << "Code section with no corresponding " + "token definition on line " << lineno << endl; + return false; + } + else + { + current_token->addCode(gather); + } + } + else + { + gather += line + "\n"; + } + continue; + } + else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + gathering_data = true; + gather = ""; + line = string(line, ovector[1]); + continue_line = true; + continue; + } + else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(), + 0, 0, ovector, ovec_size) >= 0) + { + gathering_code = true; + gather = ""; + line = string(line, ovector[1]); + continue_line = true; + continue; + } + else if (pcre_exec(token, NULL, line.c_str(), line.size(), 0, 0, ovector, ovec_size) >= 0) { string name(line, ovector[2], ovector[3] - ovector[2]); string definition(line, ovector[4], ovector[5] - ovector[4]); - string flags; - if (ovector[6] >= 0 && ovector[7] >= 0) + current_token = new TokenDefinition(); + if (current_token->create(name, definition)) { - flags = string(line, - ovector[6], ovector[7] - ovector[6]); - } - refptr td = new TokenDefinition(); - if (td->create(name, definition, flags)) - { - addTokenDefinition(td); + addTokenDefinition(current_token); } else { @@ -229,13 +295,9 @@ bool Parser::parseInputFile(char * buff, int size) << lineno << endl; return false; } - if (ovector[8] >= 0 && ovector[9] >= 0 - && ovector[9] - ovector[8] > 0) - { - td->setProcessFlag(true); - code = ""; /* FIXME: function definition */ - gathering_code = true; - } + line = string(line, ovector[1]); + continue_line = true; + continue; } else { diff --git a/TokenDefinition.cc b/TokenDefinition.cc index 6d320f2..5e2201d 100644 --- a/TokenDefinition.cc +++ b/TokenDefinition.cc @@ -56,7 +56,7 @@ TokenDefinition::TokenDefinition() } bool TokenDefinition::create(const string & name, - const string & definition, const string & flags) + const string & definition) { const char * errptr; int erroffset; @@ -71,6 +71,7 @@ bool TokenDefinition::create(const string & name, m_definition = definition; pcre_free(re); +#if 0 refptr< vector< string > > parts = split(",", flags); for (int i = 0, sz = parts->size(); i < sz; i++) { @@ -86,6 +87,7 @@ bool TokenDefinition::create(const string & name, return false; } } +#endif return true; } diff --git a/TokenDefinition.h b/TokenDefinition.h index a953214..d13f8fc 100644 --- a/TokenDefinition.h +++ b/TokenDefinition.h @@ -3,22 +3,31 @@ #define TOKENDEFINITION_H #include +#include "refptr.h" class TokenDefinition { public: TokenDefinition(); bool create(const std::string & name, - const std::string & definition, const std::string & flags); + const std::string & definition); std::string getCString() const; std::string getName() const { return m_name; } bool getProcessFlag() const { return m_process; } void setProcessFlag(bool p) { m_process = p; } + void addData(const std::string & d) { m_data += d; } + std::string getData() const { return m_data; } + void addCode(const std::string & c) { m_code += c; m_process = true; } + std::string getCode() const { return m_code; } protected: std::string m_name; std::string m_definition; bool m_process; + std::string m_data; + std::string m_code; }; +typedef refptr TokenDefinitionRef; + #endif diff --git a/tests/build/itest.I b/tests/build/itest.I index 7cc7320..9f877ea 100644 --- a/tests/build/itest.I +++ b/tests/build/itest.I @@ -6,12 +6,26 @@ OR or NOT not LPAREN \( RPAREN \) -WS \s+ {{ +WS \s+ %{ cout << "Hi there WS!!!!!" << endl; -}} +%} EQUALS = IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* +DEC_INT [1-9]\d*\b +${ + uint64_t value; +$} +%{ + sscanf("%lld", matches[1].c_str(), &value); +%} + +HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{ + sscanf("%llx", matches[1].c_str(), &value); +%} + +OCT_INT 0([0-7]*)\b + [rules] Assignment := IDENTIFIER ASSIGN Expression