added data gathering; changed the way gathering was done (now ${...$} and %{...%}); added continue_line in parseInputFile()

2010-05-19 12:00:06 -04:00 · 2010-05-19 12:00:06 -04:00 · 762ff3d561
commit 762ff3d561
parent b90b83ab0f
4 changed files with 159 additions and 72 deletions
--- a/Parser.cc
+++ b/Parser.cc
@ -82,7 +82,8 @@ bool Parser::parseInputFile(char * buff, int size)
 {
    typedef pcre * pcre_ptr;
    enum { none, tokens, rules };
-    pcre_ptr empty, comment, section_name, token, rule, code_end_sec;
+    pcre_ptr empty, comment, section_name, token, rule,
+             data_begin, data_end, code_begin, code_end;
    struct { pcre_ptr * re; const char * pattern; } exprs[] = {
        {&empty,        "^\\s*$"},
        {&comment,      "^\\s*#"},
@ -90,19 +91,19 @@ bool Parser::parseInputFile(char * buff, int size)
        {&token,        "^\\s*"                     /* possible leading ws */
                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* 1: token name */
                        "\\s+"                      /* required whitespace */
-                        "((?:[^\\\\\\s]|\\\\.)+)"   /* 2: token RE */
-                        "(?:\\s+\\[([^\\]]+)\\])?"  /* 3: token flags */
-                        "\\s*({{)?"                 /* 4: code section opener */
-                        "\\s*$"},                   /* possible trailing ws */
+                        "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"},
-        {&code_end_sec, "^\\s*}}\\s*$"}
+        {&data_begin,   "^\\s*\\${"},
+        {&data_end,     "\\$}"},
+        {&code_begin,   "^\\s*%{"},
+        {&code_end,     "%}"}
    };
    const int ovec_size = 3 * 10;
    int ovector[ovec_size];
    int lineno = 0;
    char * newline;
    char * input = buff;
-    string sn;
+    string current_section_name;
    map<string, int> sections;
    sections["none"] = none;
    sections["tokens"] = tokens;
@ -110,8 +111,11 @@ bool Parser::parseInputFile(char * buff, int size)
    int section = none;
    string line;
    bool append_line = false;
+    bool gathering_data = false;
    bool gathering_code = false;
-    string code;
+    string gather;
+    bool continue_line = false;
+    TokenDefinitionRef current_token;

    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
    {
@ -127,8 +131,16 @@ bool Parser::parseInputFile(char * buff, int size)
        }
    }

-    while ((newline = strstr(input, "\n")) != NULL)
+    for (;;)
    {
+        if (continue_line)
+        {
+            continue_line = false;
+        }
+        else
+        {
+            if ((newline = strstr(input, "\n")) == NULL)
+                break;
            int line_length = newline - input;
            if (line_length >= 1 && newline[-1] == '\r')
            {
@ -146,21 +158,6 @@ bool Parser::parseInputFile(char * buff, int size)
                line = string(input, line_length);
            }
            input = newline + 1;        /* set up for next loop iteration */
-
-        if (gathering_code)
-        {
-            if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(),
-                        0, 0, ovector, ovec_size) >= 0)
-            {
-                gathering_code = false;
-                code += "}\n";
-                /* TODO: do something with gathered code */
-            }
-            else
-            {
-                code += line;
-            }
-            continue;
        }

        if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
@ -173,6 +170,8 @@ bool Parser::parseInputFile(char * buff, int size)
            continue;
        }

+        if (! (gathering_code || gathering_data) )
+        {
            if (line.size() > 0 && line[line.size()-1] == '\\')
            {
                line[line.size()-1] = ' ';
@ -187,18 +186,21 @@ bool Parser::parseInputFile(char * buff, int size)
            if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
                        0, 0, ovector, ovec_size) >= 0)
            {
-            sn = string(line, ovector[2], ovector[3] - ovector[2]);
-            if (sections.find(sn) != sections.end())
+                current_section_name
+                    = string(line, ovector[2], ovector[3] - ovector[2]);
+                if (sections.find(current_section_name) != sections.end())
                {
-                section = sections[sn];
+                    section = sections[current_section_name];
                }
                else
                {
-                cerr << "Unknown section name '" << sn << "'!" << endl;
+                    cerr << "Unknown section name '" << current_section_name
+                        << "'!" << endl;
                    return false;
                }
                continue;
            }
+        }

        switch (section)
        {
@ -206,22 +208,86 @@ bool Parser::parseInputFile(char * buff, int size)
                cerr << "Unrecognized input on line " << lineno << endl;
                return false;
            case tokens:
-                if (pcre_exec(token, NULL, line.c_str(), line.size(),
+                if      (gathering_data)
+                {
+                    if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
+                                0, 0, ovector, ovec_size) >= 0)
+                    {
+                        gather += string(line, 0, ovector[0]) + "\n";
+                        gathering_data = false;
+                        line = string(line, ovector[1]);
+                        continue_line = true;
+                        if (current_token.isNull())
+                        {
+                            cerr << "Data section with no corresponding "
+                                "token definition on line " << lineno << endl;
+                            return false;
+                        }
+                        else
+                        {
+                            current_token->addData(gather);
+                        }
+                    }
+                    else
+                    {
+                        gather += line + "\n";
+                    }
+                    continue;
+                }
+                else if (gathering_code)
+                {
+                    if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
+                                0, 0, ovector, ovec_size) >= 0)
+                    {
+                        gather += string(line, 0, ovector[0]) + "\n";
+                        gathering_code = false;
+                        line = string(line, ovector[1]);
+                        continue_line = true;
+                        if (current_token.isNull())
+                        {
+                            cerr << "Code section with no corresponding "
+                                "token definition on line " << lineno << endl;
+                            return false;
+                        }
+                        else
+                        {
+                            current_token->addCode(gather);
+                        }
+                    }
+                    else
+                    {
+                        gather += line + "\n";
+                    }
+                    continue;
+                }
+                else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    gathering_data = true;
+                    gather = "";
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
+                }
+                else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    gathering_code = true;
+                    gather = "";
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
+                }
+                else if (pcre_exec(token, NULL, line.c_str(), line.size(),
                            0, 0, ovector, ovec_size) >= 0)
                {
                    string name(line, ovector[2], ovector[3] - ovector[2]);
                    string definition(line,
                            ovector[4], ovector[5] - ovector[4]);
-                    string flags;
-                    if (ovector[6] >= 0 && ovector[7] >= 0)
+                    current_token = new TokenDefinition();
+                    if (current_token->create(name, definition))
                    {
-                        flags = string(line,
-                                ovector[6], ovector[7] - ovector[6]);
-                    }
-                    refptr<TokenDefinition> td = new TokenDefinition();
-                    if (td->create(name, definition, flags))
-                    {
-                        addTokenDefinition(td);
+                        addTokenDefinition(current_token);
                    }
                    else
                    {
@ -229,13 +295,9 @@ bool Parser::parseInputFile(char * buff, int size)
                            << lineno << endl;
                        return false;
                    }
-                    if (ovector[8] >= 0 && ovector[9] >= 0
-                            && ovector[9] - ovector[8] > 0)
-                    {
-                        td->setProcessFlag(true);
-                        code = ""; /* FIXME: function definition */
-                        gathering_code = true;
-                    }
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
                }
                else
                {
--- a/TokenDefinition.cc
+++ b/TokenDefinition.cc
@ -56,7 +56,7 @@ TokenDefinition::TokenDefinition()
 }

 bool TokenDefinition::create(const string & name,
-        const string & definition, const string & flags)
+        const string & definition)
 {
    const char * errptr;
    int erroffset;
@ -71,6 +71,7 @@ bool TokenDefinition::create(const string & name,
    m_definition = definition;
    pcre_free(re);

+#if 0
    refptr< vector< string > > parts = split(",", flags);
    for (int i = 0, sz = parts->size(); i < sz; i++)
    {
@ -86,6 +87,7 @@ bool TokenDefinition::create(const string & name,
            return false;
        }
    }
+#endif

    return true;
 }
--- a/TokenDefinition.h
+++ b/TokenDefinition.h
@ -3,22 +3,31 @@
 #define TOKENDEFINITION_H

 #include <string>
+#include "refptr.h"

 class TokenDefinition
 {
    public:
        TokenDefinition();
        bool create(const std::string & name,
-                const std::string & definition, const std::string & flags);
+                const std::string & definition);
        std::string getCString() const;
        std::string getName() const { return m_name; }
        bool getProcessFlag() const { return m_process; }
        void setProcessFlag(bool p) { m_process = p; }
+        void addData(const std::string & d) { m_data += d; }
+        std::string getData() const { return m_data; }
+        void addCode(const std::string & c) { m_code += c; m_process = true; }
+        std::string getCode() const { return m_code; }

    protected:
        std::string m_name;
        std::string m_definition;
        bool m_process;
+        std::string m_data;
+        std::string m_code;
 };

+typedef refptr<TokenDefinition> TokenDefinitionRef;
+
 #endif
--- a/tests/build/itest.I
+++ b/tests/build/itest.I
@ -6,12 +6,26 @@ OR          or
 NOT         not
 LPAREN      \(
 RPAREN      \)
-WS          \s+ {{
+WS          \s+ %{
    cout << "Hi there WS!!!!!" << endl;
-}}
+%}
 EQUALS      =
 IDENTIFIER  [a-zA-Z_][a-zA-Z_0-9]*

+DEC_INT     [1-9]\d*\b
+${
+    uint64_t value;
+$}
+%{
+    sscanf("%lld", matches[1].c_str(), &value);
+%}
+
+HEX_INT     0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
+    sscanf("%llx", matches[1].c_str(), &value);
+%}
+
+OCT_INT     0([0-7]*)\b
+
 [rules]

 Assignment := IDENTIFIER ASSIGN Expression