added data gathering; changed the way gathering was done (now ${...$} and %{...%}); added continue_line in parseInputFile()
This commit is contained in:
parent
b90b83ab0f
commit
762ff3d561
198
Parser.cc
198
Parser.cc
@ -82,7 +82,8 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
{
|
{
|
||||||
typedef pcre * pcre_ptr;
|
typedef pcre * pcre_ptr;
|
||||||
enum { none, tokens, rules };
|
enum { none, tokens, rules };
|
||||||
pcre_ptr empty, comment, section_name, token, rule, code_end_sec;
|
pcre_ptr empty, comment, section_name, token, rule,
|
||||||
|
data_begin, data_end, code_begin, code_end;
|
||||||
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
|
struct { pcre_ptr * re; const char * pattern; } exprs[] = {
|
||||||
{&empty, "^\\s*$"},
|
{&empty, "^\\s*$"},
|
||||||
{&comment, "^\\s*#"},
|
{&comment, "^\\s*#"},
|
||||||
@ -90,19 +91,19 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
{&token, "^\\s*" /* possible leading ws */
|
{&token, "^\\s*" /* possible leading ws */
|
||||||
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
|
"([a-zA-Z_][a-zA-Z_0-9]*)" /* 1: token name */
|
||||||
"\\s+" /* required whitespace */
|
"\\s+" /* required whitespace */
|
||||||
"((?:[^\\\\\\s]|\\\\.)+)" /* 2: token RE */
|
"((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
|
||||||
"(?:\\s+\\[([^\\]]+)\\])?" /* 3: token flags */
|
|
||||||
"\\s*({{)?" /* 4: code section opener */
|
|
||||||
"\\s*$"}, /* possible trailing ws */
|
|
||||||
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
|
{&rule, "^\\s*(\\S+)\\s*:=(.*)$"},
|
||||||
{&code_end_sec, "^\\s*}}\\s*$"}
|
{&data_begin, "^\\s*\\${"},
|
||||||
|
{&data_end, "\\$}"},
|
||||||
|
{&code_begin, "^\\s*%{"},
|
||||||
|
{&code_end, "%}"}
|
||||||
};
|
};
|
||||||
const int ovec_size = 3 * 10;
|
const int ovec_size = 3 * 10;
|
||||||
int ovector[ovec_size];
|
int ovector[ovec_size];
|
||||||
int lineno = 0;
|
int lineno = 0;
|
||||||
char * newline;
|
char * newline;
|
||||||
char * input = buff;
|
char * input = buff;
|
||||||
string sn;
|
string current_section_name;
|
||||||
map<string, int> sections;
|
map<string, int> sections;
|
||||||
sections["none"] = none;
|
sections["none"] = none;
|
||||||
sections["tokens"] = tokens;
|
sections["tokens"] = tokens;
|
||||||
@ -110,8 +111,11 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
int section = none;
|
int section = none;
|
||||||
string line;
|
string line;
|
||||||
bool append_line = false;
|
bool append_line = false;
|
||||||
|
bool gathering_data = false;
|
||||||
bool gathering_code = false;
|
bool gathering_code = false;
|
||||||
string code;
|
string gather;
|
||||||
|
bool continue_line = false;
|
||||||
|
TokenDefinitionRef current_token;
|
||||||
|
|
||||||
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
|
for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
|
||||||
{
|
{
|
||||||
@ -127,40 +131,33 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((newline = strstr(input, "\n")) != NULL)
|
for (;;)
|
||||||
{
|
{
|
||||||
int line_length = newline - input;
|
if (continue_line)
|
||||||
if (line_length >= 1 && newline[-1] == '\r')
|
|
||||||
{
|
{
|
||||||
newline[-1] = '\n';
|
continue_line = false;
|
||||||
line_length--;
|
|
||||||
}
|
|
||||||
lineno++;
|
|
||||||
|
|
||||||
if (append_line)
|
|
||||||
{
|
|
||||||
line += string(input, line_length);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
line = string(input, line_length);
|
if ((newline = strstr(input, "\n")) == NULL)
|
||||||
}
|
break;
|
||||||
input = newline + 1; /* set up for next loop iteration */
|
int line_length = newline - input;
|
||||||
|
if (line_length >= 1 && newline[-1] == '\r')
|
||||||
if (gathering_code)
|
|
||||||
{
|
|
||||||
if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(),
|
|
||||||
0, 0, ovector, ovec_size) >= 0)
|
|
||||||
{
|
{
|
||||||
gathering_code = false;
|
newline[-1] = '\n';
|
||||||
code += "}\n";
|
line_length--;
|
||||||
/* TODO: do something with gathered code */
|
}
|
||||||
|
lineno++;
|
||||||
|
|
||||||
|
if (append_line)
|
||||||
|
{
|
||||||
|
line += string(input, line_length);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code += line;
|
line = string(input, line_length);
|
||||||
}
|
}
|
||||||
continue;
|
input = newline + 1; /* set up for next loop iteration */
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
|
if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
|
||||||
@ -173,31 +170,36 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.size() > 0 && line[line.size()-1] == '\\')
|
if (! (gathering_code || gathering_data) )
|
||||||
{
|
{
|
||||||
line[line.size()-1] = ' ';
|
if (line.size() > 0 && line[line.size()-1] == '\\')
|
||||||
append_line = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
append_line = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
|
|
||||||
0, 0, ovector, ovec_size) >= 0)
|
|
||||||
{
|
|
||||||
sn = string(line, ovector[2], ovector[3] - ovector[2]);
|
|
||||||
if (sections.find(sn) != sections.end())
|
|
||||||
{
|
{
|
||||||
section = sections[sn];
|
line[line.size()-1] = ' ';
|
||||||
|
append_line = true;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cerr << "Unknown section name '" << sn << "'!" << endl;
|
append_line = false;
|
||||||
return false;
|
}
|
||||||
|
|
||||||
|
if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
|
||||||
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
|
{
|
||||||
|
current_section_name
|
||||||
|
= string(line, ovector[2], ovector[3] - ovector[2]);
|
||||||
|
if (sections.find(current_section_name) != sections.end())
|
||||||
|
{
|
||||||
|
section = sections[current_section_name];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cerr << "Unknown section name '" << current_section_name
|
||||||
|
<< "'!" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (section)
|
switch (section)
|
||||||
@ -206,22 +208,86 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
cerr << "Unrecognized input on line " << lineno << endl;
|
cerr << "Unrecognized input on line " << lineno << endl;
|
||||||
return false;
|
return false;
|
||||||
case tokens:
|
case tokens:
|
||||||
if (pcre_exec(token, NULL, line.c_str(), line.size(),
|
if (gathering_data)
|
||||||
|
{
|
||||||
|
if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
|
||||||
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
|
{
|
||||||
|
gather += string(line, 0, ovector[0]) + "\n";
|
||||||
|
gathering_data = false;
|
||||||
|
line = string(line, ovector[1]);
|
||||||
|
continue_line = true;
|
||||||
|
if (current_token.isNull())
|
||||||
|
{
|
||||||
|
cerr << "Data section with no corresponding "
|
||||||
|
"token definition on line " << lineno << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_token->addData(gather);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gather += line + "\n";
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (gathering_code)
|
||||||
|
{
|
||||||
|
if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
|
||||||
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
|
{
|
||||||
|
gather += string(line, 0, ovector[0]) + "\n";
|
||||||
|
gathering_code = false;
|
||||||
|
line = string(line, ovector[1]);
|
||||||
|
continue_line = true;
|
||||||
|
if (current_token.isNull())
|
||||||
|
{
|
||||||
|
cerr << "Code section with no corresponding "
|
||||||
|
"token definition on line " << lineno << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_token->addCode(gather);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gather += line + "\n";
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
|
||||||
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
|
{
|
||||||
|
gathering_data = true;
|
||||||
|
gather = "";
|
||||||
|
line = string(line, ovector[1]);
|
||||||
|
continue_line = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
|
||||||
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
|
{
|
||||||
|
gathering_code = true;
|
||||||
|
gather = "";
|
||||||
|
line = string(line, ovector[1]);
|
||||||
|
continue_line = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (pcre_exec(token, NULL, line.c_str(), line.size(),
|
||||||
0, 0, ovector, ovec_size) >= 0)
|
0, 0, ovector, ovec_size) >= 0)
|
||||||
{
|
{
|
||||||
string name(line, ovector[2], ovector[3] - ovector[2]);
|
string name(line, ovector[2], ovector[3] - ovector[2]);
|
||||||
string definition(line,
|
string definition(line,
|
||||||
ovector[4], ovector[5] - ovector[4]);
|
ovector[4], ovector[5] - ovector[4]);
|
||||||
string flags;
|
current_token = new TokenDefinition();
|
||||||
if (ovector[6] >= 0 && ovector[7] >= 0)
|
if (current_token->create(name, definition))
|
||||||
{
|
{
|
||||||
flags = string(line,
|
addTokenDefinition(current_token);
|
||||||
ovector[6], ovector[7] - ovector[6]);
|
|
||||||
}
|
|
||||||
refptr<TokenDefinition> td = new TokenDefinition();
|
|
||||||
if (td->create(name, definition, flags))
|
|
||||||
{
|
|
||||||
addTokenDefinition(td);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -229,13 +295,9 @@ bool Parser::parseInputFile(char * buff, int size)
|
|||||||
<< lineno << endl;
|
<< lineno << endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ovector[8] >= 0 && ovector[9] >= 0
|
line = string(line, ovector[1]);
|
||||||
&& ovector[9] - ovector[8] > 0)
|
continue_line = true;
|
||||||
{
|
continue;
|
||||||
td->setProcessFlag(true);
|
|
||||||
code = ""; /* FIXME: function definition */
|
|
||||||
gathering_code = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -56,7 +56,7 @@ TokenDefinition::TokenDefinition()
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool TokenDefinition::create(const string & name,
|
bool TokenDefinition::create(const string & name,
|
||||||
const string & definition, const string & flags)
|
const string & definition)
|
||||||
{
|
{
|
||||||
const char * errptr;
|
const char * errptr;
|
||||||
int erroffset;
|
int erroffset;
|
||||||
@ -71,6 +71,7 @@ bool TokenDefinition::create(const string & name,
|
|||||||
m_definition = definition;
|
m_definition = definition;
|
||||||
pcre_free(re);
|
pcre_free(re);
|
||||||
|
|
||||||
|
#if 0
|
||||||
refptr< vector< string > > parts = split(",", flags);
|
refptr< vector< string > > parts = split(",", flags);
|
||||||
for (int i = 0, sz = parts->size(); i < sz; i++)
|
for (int i = 0, sz = parts->size(); i < sz; i++)
|
||||||
{
|
{
|
||||||
@ -86,6 +87,7 @@ bool TokenDefinition::create(const string & name,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -3,22 +3,31 @@
|
|||||||
#define TOKENDEFINITION_H
|
#define TOKENDEFINITION_H
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include "refptr.h"
|
||||||
|
|
||||||
class TokenDefinition
|
class TokenDefinition
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TokenDefinition();
|
TokenDefinition();
|
||||||
bool create(const std::string & name,
|
bool create(const std::string & name,
|
||||||
const std::string & definition, const std::string & flags);
|
const std::string & definition);
|
||||||
std::string getCString() const;
|
std::string getCString() const;
|
||||||
std::string getName() const { return m_name; }
|
std::string getName() const { return m_name; }
|
||||||
bool getProcessFlag() const { return m_process; }
|
bool getProcessFlag() const { return m_process; }
|
||||||
void setProcessFlag(bool p) { m_process = p; }
|
void setProcessFlag(bool p) { m_process = p; }
|
||||||
|
void addData(const std::string & d) { m_data += d; }
|
||||||
|
std::string getData() const { return m_data; }
|
||||||
|
void addCode(const std::string & c) { m_code += c; m_process = true; }
|
||||||
|
std::string getCode() const { return m_code; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string m_name;
|
std::string m_name;
|
||||||
std::string m_definition;
|
std::string m_definition;
|
||||||
bool m_process;
|
bool m_process;
|
||||||
|
std::string m_data;
|
||||||
|
std::string m_code;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef refptr<TokenDefinition> TokenDefinitionRef;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,12 +6,26 @@ OR or
|
|||||||
NOT not
|
NOT not
|
||||||
LPAREN \(
|
LPAREN \(
|
||||||
RPAREN \)
|
RPAREN \)
|
||||||
WS \s+ {{
|
WS \s+ %{
|
||||||
cout << "Hi there WS!!!!!" << endl;
|
cout << "Hi there WS!!!!!" << endl;
|
||||||
}}
|
%}
|
||||||
EQUALS =
|
EQUALS =
|
||||||
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]*
|
IDENTIFIER [a-zA-Z_][a-zA-Z_0-9]*
|
||||||
|
|
||||||
|
DEC_INT [1-9]\d*\b
|
||||||
|
${
|
||||||
|
uint64_t value;
|
||||||
|
$}
|
||||||
|
%{
|
||||||
|
sscanf("%lld", matches[1].c_str(), &value);
|
||||||
|
%}
|
||||||
|
|
||||||
|
HEX_INT 0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
|
||||||
|
sscanf("%llx", matches[1].c_str(), &value);
|
||||||
|
%}
|
||||||
|
|
||||||
|
OCT_INT 0([0-7]*)\b
|
||||||
|
|
||||||
[rules]
|
[rules]
|
||||||
|
|
||||||
Assignment := IDENTIFIER ASSIGN Expression
|
Assignment := IDENTIFIER ASSIGN Expression
|
||||||
|
Loading…
x
Reference in New Issue
Block a user