added data gathering; changed the way gathering was done (now ${...$} and %{...%}); added continue_line in parseInputFile()

This commit is contained in:
Josh Holtrop 2010-05-19 12:00:06 -04:00
parent b90b83ab0f
commit 762ff3d561
4 changed files with 159 additions and 72 deletions

198
Parser.cc
View File

@ -82,7 +82,8 @@ bool Parser::parseInputFile(char * buff, int size)
{ {
typedef pcre * pcre_ptr; typedef pcre * pcre_ptr;
enum { none, tokens, rules }; enum { none, tokens, rules };
pcre_ptr empty, comment, section_name, token, rule, code_end_sec; pcre_ptr empty, comment, section_name, token, rule,
data_begin, data_end, code_begin, code_end;
struct { pcre_ptr * re; const char * pattern; } exprs[] = { struct { pcre_ptr * re; const char * pattern; } exprs[] = {
{&empty, "^\\s*$"}, {&empty, "^\\s*$"},
{&comment, "^\\s*#"}, {&comment, "^\\s*#"},
@ -90,19 +91,19 @@ bool Parser::parseInputFile(char * buff, int size)
{&token, "^\\s*" /* possible leading ws */ {&token, "^\\s*" /* possible leading ws */
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */ "([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
"\\s+" /* required whitespace */ "\\s+" /* required whitespace */
"((?:[^\\\\\\s]|\\\\.)+)" /* 2: token RE */ "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
"(?:\\s+\\[([^\\]]+)\\])?" /* 3: token flags */
"\\s*({{)?" /* 4: code section opener */
"\\s*$"}, /* possible trailing ws */
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"}, {&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
{&code_end_sec, "^\\s*}}\\s*$"} {&data_begin, "^\\s*\\${"},
{&data_end, "\\$}"},
{&code_begin, "^\\s*%{"},
{&code_end, "%}"}
}; };
const int ovec_size = 3 * 10; const int ovec_size = 3 * 10;
int ovector[ovec_size]; int ovector[ovec_size];
int lineno = 0; int lineno = 0;
char * newline; char * newline;
char * input = buff; char * input = buff;
string sn; string current_section_name;
map<string, int> sections; map<string, int> sections;
sections["none"] = none; sections["none"] = none;
sections["tokens"] = tokens; sections["tokens"] = tokens;
@ -110,8 +111,11 @@ bool Parser::parseInputFile(char * buff, int size)
int section = none; int section = none;
string line; string line;
bool append_line = false; bool append_line = false;
bool gathering_data = false;
bool gathering_code = false; bool gathering_code = false;
string code; string gather;
bool continue_line = false;
TokenDefinitionRef current_token;
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++) for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{ {
@ -127,40 +131,33 @@ bool Parser::parseInputFile(char * buff, int size)
} }
} }
while ((newline = strstr(input, "\n")) != NULL) for (;;)
{ {
int line_length = newline - input; if (continue_line)
if (line_length >= 1 && newline[-1] == '\r')
{ {
newline[-1] = '\n'; continue_line = false;
line_length--;
}
lineno++;
if (append_line)
{
line += string(input, line_length);
} }
else else
{ {
line = string(input, line_length); if ((newline = strstr(input, "\n")) == NULL)
} break;
input = newline + 1; /* set up for next loop iteration */ int line_length = newline - input;
if (line_length >= 1 && newline[-1] == '\r')
if (gathering_code)
{
if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{ {
gathering_code = false; newline[-1] = '\n';
code += "}\n"; line_length--;
/* TODO: do something with gathered code */ }
lineno++;
if (append_line)
{
line += string(input, line_length);
} }
else else
{ {
code += line; line = string(input, line_length);
} }
continue; input = newline + 1; /* set up for next loop iteration */
} }
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(), if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
@ -173,31 +170,36 @@ bool Parser::parseInputFile(char * buff, int size)
continue; continue;
} }
if (line.size() > 0 && line[line.size()-1] == '\\') if (! (gathering_code || gathering_data) )
{ {
line[line.size()-1] = ' '; if (line.size() > 0 && line[line.size()-1] == '\\')
append_line = true;
continue;
}
else
{
append_line = false;
}
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
sn = string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(sn) != sections.end())
{ {
section = sections[sn]; line[line.size()-1] = ' ';
append_line = true;
continue;
} }
else else
{ {
cerr << "Unknown section name '" << sn << "'!" << endl; append_line = false;
return false; }
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
current_section_name
= string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(current_section_name) != sections.end())
{
section = sections[current_section_name];
}
else
{
cerr << "Unknown section name '" << current_section_name
<< "'!" << endl;
return false;
}
continue;
} }
continue;
} }
switch (section) switch (section)
@ -206,22 +208,86 @@ bool Parser::parseInputFile(char * buff, int size)
cerr << "Unrecognized input on line " << lineno << endl; cerr << "Unrecognized input on line " << lineno << endl;
return false; return false;
case tokens: case tokens:
if (pcre_exec(token, NULL, line.c_str(), line.size(), if (gathering_data)
{
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_data = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
cerr << "Data section with no corresponding "
"token definition on line " << lineno << endl;
return false;
}
else
{
current_token->addData(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (gathering_code)
{
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_code = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
cerr << "Code section with no corresponding "
"token definition on line " << lineno << endl;
return false;
}
else
{
current_token->addCode(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_data = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_code = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0) 0, 0, ovector, ovec_size) >= 0)
{ {
string name(line, ovector[2], ovector[3] - ovector[2]); string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line, string definition(line,
ovector[4], ovector[5] - ovector[4]); ovector[4], ovector[5] - ovector[4]);
string flags; current_token = new TokenDefinition();
if (ovector[6] >= 0 && ovector[7] >= 0) if (current_token->create(name, definition))
{ {
flags = string(line, addTokenDefinition(current_token);
ovector[6], ovector[7] - ovector[6]);
}
refptr<TokenDefinition> td = new TokenDefinition();
if (td->create(name, definition, flags))
{
addTokenDefinition(td);
} }
else else
{ {
@ -229,13 +295,9 @@ bool Parser::parseInputFile(char * buff, int size)
<< lineno << endl; << lineno << endl;
return false; return false;
} }
if (ovector[8] >= 0 && ovector[9] >= 0 line = string(line, ovector[1]);
&& ovector[9] - ovector[8] > 0) continue_line = true;
{ continue;
td->setProcessFlag(true);
code = ""; /* FIXME: function definition */
gathering_code = true;
}
} }
else else
{ {

View File

@ -56,7 +56,7 @@ TokenDefinition::TokenDefinition()
} }
bool TokenDefinition::create(const string & name, bool TokenDefinition::create(const string & name,
const string & definition, const string & flags) const string & definition)
{ {
const char * errptr; const char * errptr;
int erroffset; int erroffset;
@ -71,6 +71,7 @@ bool TokenDefinition::create(const string & name,
m_definition = definition; m_definition = definition;
pcre_free(re); pcre_free(re);
#if 0
refptr< vector< string > > parts = split(",", flags); refptr< vector< string > > parts = split(",", flags);
for (int i = 0, sz = parts->size(); i < sz; i++) for (int i = 0, sz = parts->size(); i < sz; i++)
{ {
@ -86,6 +87,7 @@ bool TokenDefinition::create(const string & name,
return false; return false;
} }
} }
#endif
return true; return true;
} }

View File

@ -3,22 +3,31 @@
#define TOKENDEFINITION_H #define TOKENDEFINITION_H
#include <string> #include <string>
#include "refptr.h"
class TokenDefinition class TokenDefinition
{ {
public: public:
TokenDefinition(); TokenDefinition();
bool create(const std::string & name, bool create(const std::string & name,
const std::string & definition, const std::string & flags); const std::string & definition);
std::string getCString() const; std::string getCString() const;
std::string getName() const { return m_name; } std::string getName() const { return m_name; }
bool getProcessFlag() const { return m_process; } bool getProcessFlag() const { return m_process; }
void setProcessFlag(bool p) { m_process = p; } void setProcessFlag(bool p) { m_process = p; }
void addData(const std::string & d) { m_data += d; }
std::string getData() const { return m_data; }
void addCode(const std::string & c) { m_code += c; m_process = true; }
std::string getCode() const { return m_code; }
protected: protected:
std::string m_name; std::string m_name;
std::string m_definition; std::string m_definition;
bool m_process; bool m_process;
std::string m_data;
std::string m_code;
}; };
typedef refptr<TokenDefinition> TokenDefinitionRef;
#endif #endif

View File

@ -6,12 +6,26 @@ OR or
NOT not NOT not
LPAREN \( LPAREN \(
RPAREN \) RPAREN \)
WS \s+ {{ WS \s+ %{
cout << "Hi there WS!!!!!" << endl; cout << "Hi there WS!!!!!" << endl;
}} %}
EQUALS = EQUALS =
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]* IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]*
DEC_INT [1-9]\d*\b
${
uint64_t value;
$}
%{
sscanf("%lld", matches[1].c_str(), &value);
%}
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
sscanf("%llx", matches[1].c_str(), &value);
%}
OCT_INT 0([0-7]*)\b
[rules] [rules]
Assignment := IDENTIFIER ASSIGN Expression Assignment := IDENTIFIER ASSIGN Expression