added data gathering; changed the way gathering was done (now ${...$} and %{...%}); added continue_line in parseInputFile()

This commit is contained in:
Josh Holtrop 2010-05-19 12:00:06 -04:00
parent b90b83ab0f
commit 762ff3d561
4 changed files with 159 additions and 72 deletions

152
Parser.cc
View File

@ -82,7 +82,8 @@ bool Parser::parseInputFile(char * buff, int size)
{
typedef pcre * pcre_ptr;
enum { none, tokens, rules };
pcre_ptr empty, comment, section_name, token, rule, code_end_sec;
pcre_ptr empty, comment, section_name, token, rule,
data_begin, data_end, code_begin, code_end;
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
{&empty, "^\\s*$"},
{&comment, "^\\s*#"},
@ -90,19 +91,19 @@ bool Parser::parseInputFile(char * buff, int size)
{&token, "^\\s*" /* possible leading ws */
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
"\\s+" /* required whitespace */
"((?:[^\\\\\\s]|\\\\.)+)" /* 2: token RE */
"(?:\\s+\\[([^\\]]+)\\])?" /* 3: token flags */
"\\s*({{)?" /* 4: code section opener */
"\\s*$"}, /* possible trailing ws */
"((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
{&code_end_sec, "^\\s*}}\\s*$"}
{&data_begin, "^\\s*\\${"},
{&data_end, "\\$}"},
{&code_begin, "^\\s*%{"},
{&code_end, "%}"}
};
const int ovec_size = 3 * 10;
int ovector[ovec_size];
int lineno = 0;
char * newline;
char * input = buff;
string sn;
string current_section_name;
map<string, int> sections;
sections["none"] = none;
sections["tokens"] = tokens;
@ -110,8 +111,11 @@ bool Parser::parseInputFile(char * buff, int size)
int section = none;
string line;
bool append_line = false;
bool gathering_data = false;
bool gathering_code = false;
string code;
string gather;
bool continue_line = false;
TokenDefinitionRef current_token;
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
{
@ -127,8 +131,16 @@ bool Parser::parseInputFile(char * buff, int size)
}
}
while ((newline = strstr(input, "\n")) != NULL)
for (;;)
{
if (continue_line)
{
continue_line = false;
}
else
{
if ((newline = strstr(input, "\n")) == NULL)
break;
int line_length = newline - input;
if (line_length >= 1 && newline[-1] == '\r')
{
@ -146,21 +158,6 @@ bool Parser::parseInputFile(char * buff, int size)
line = string(input, line_length);
}
input = newline + 1; /* set up for next loop iteration */
if (gathering_code)
{
if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_code = false;
code += "}\n";
/* TODO: do something with gathered code */
}
else
{
code += line;
}
continue;
}
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
@ -173,6 +170,8 @@ bool Parser::parseInputFile(char * buff, int size)
continue;
}
if (! (gathering_code || gathering_data) )
{
if (line.size() > 0 && line[line.size()-1] == '\\')
{
line[line.size()-1] = ' ';
@ -187,18 +186,21 @@ bool Parser::parseInputFile(char * buff, int size)
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
sn = string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(sn) != sections.end())
current_section_name
= string(line, ovector[2], ovector[3] - ovector[2]);
if (sections.find(current_section_name) != sections.end())
{
section = sections[sn];
section = sections[current_section_name];
}
else
{
cerr << "Unknown section name '" << sn << "'!" << endl;
cerr << "Unknown section name '" << current_section_name
<< "'!" << endl;
return false;
}
continue;
}
}
switch (section)
{
@ -206,22 +208,86 @@ bool Parser::parseInputFile(char * buff, int size)
cerr << "Unrecognized input on line " << lineno << endl;
return false;
case tokens:
if (pcre_exec(token, NULL, line.c_str(), line.size(),
if (gathering_data)
{
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_data = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
cerr << "Data section with no corresponding "
"token definition on line " << lineno << endl;
return false;
}
else
{
current_token->addData(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (gathering_code)
{
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gather += string(line, 0, ovector[0]) + "\n";
gathering_code = false;
line = string(line, ovector[1]);
continue_line = true;
if (current_token.isNull())
{
cerr << "Code section with no corresponding "
"token definition on line " << lineno << endl;
return false;
}
else
{
current_token->addCode(gather);
}
}
else
{
gather += line + "\n";
}
continue;
}
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_data = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
gathering_code = true;
gather = "";
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
0, 0, ovector, ovec_size) >= 0)
{
string name(line, ovector[2], ovector[3] - ovector[2]);
string definition(line,
ovector[4], ovector[5] - ovector[4]);
string flags;
if (ovector[6] >= 0 && ovector[7] >= 0)
current_token = new TokenDefinition();
if (current_token->create(name, definition))
{
flags = string(line,
ovector[6], ovector[7] - ovector[6]);
}
refptr<TokenDefinition> td = new TokenDefinition();
if (td->create(name, definition, flags))
{
addTokenDefinition(td);
addTokenDefinition(current_token);
}
else
{
@ -229,13 +295,9 @@ bool Parser::parseInputFile(char * buff, int size)
<< lineno << endl;
return false;
}
if (ovector[8] >= 0 && ovector[9] >= 0
&& ovector[9] - ovector[8] > 0)
{
td->setProcessFlag(true);
code = ""; /* FIXME: function definition */
gathering_code = true;
}
line = string(line, ovector[1]);
continue_line = true;
continue;
}
else
{

View File

@ -56,7 +56,7 @@ TokenDefinition::TokenDefinition()
}
bool TokenDefinition::create(const string & name,
const string & definition, const string & flags)
const string & definition)
{
const char * errptr;
int erroffset;
@ -71,6 +71,7 @@ bool TokenDefinition::create(const string & name,
m_definition = definition;
pcre_free(re);
#if 0
refptr< vector< string > > parts = split(",", flags);
for (int i = 0, sz = parts->size(); i < sz; i++)
{
@ -86,6 +87,7 @@ bool TokenDefinition::create(const string & name,
return false;
}
}
#endif
return true;
}

View File

@ -3,22 +3,31 @@
#define TOKENDEFINITION_H
#include <string>
#include "refptr.h"
class TokenDefinition
{
public:
TokenDefinition();
bool create(const std::string & name,
const std::string & definition, const std::string & flags);
const std::string & definition);
std::string getCString() const;
std::string getName() const { return m_name; }
bool getProcessFlag() const { return m_process; }
void setProcessFlag(bool p) { m_process = p; }
void addData(const std::string & d) { m_data += d; }
std::string getData() const { return m_data; }
void addCode(const std::string & c) { m_code += c; m_process = true; }
std::string getCode() const { return m_code; }
protected:
std::string m_name;
std::string m_definition;
bool m_process;
std::string m_data;
std::string m_code;
};
typedef refptr<TokenDefinition> TokenDefinitionRef;
#endif

View File

@ -6,12 +6,26 @@ OR or
NOT not
LPAREN \(
RPAREN \)
WS \s+ {{
WS \s+ %{
cout << "Hi there WS!!!!!" << endl;
}}
%}
EQUALS =
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]*
DEC_INT [1-9]\d*\b
${
uint64_t value;
$}
%{
sscanf("%lld", matches[1].c_str(), &value);
%}
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
sscanf("%llx", matches[1].c_str(), &value);
%}
OCT_INT 0([0-7]*)\b
[rules]
Assignment := IDENTIFIER ASSIGN Expression