restructured Parser::parseInputFile() a bit and added code-gathering functionality

2010-05-11 11:04:23 -04:00 · 2010-05-11 11:04:23 -04:00 · 15c5751632
commit 15c5751632
parent ec8af293e1
1 changed files with 114 additions and 81 deletions
--- a/Parser.cc
+++ b/Parser.cc
@ -12,6 +12,7 @@
 #include "Parser.h"
 #include "TokenDefinition.h"
 #include "RuleDefinition.h"
 #include "tmpl.h"
 using namespace std;
@ -76,22 +77,24 @@ bool Parser::parseInputFile(char * buff, int size)
 {
    typedef pcre * pcre_ptr;
    enum { none, tokens, rules };
-    pcre_ptr empty, comment, section_name, token, rule;
+    pcre_ptr empty, comment, section_name, token, rule, code_end_sec;
    struct { pcre_ptr * re; const char * pattern; } exprs[] = {
        {&empty,        "^\\s*$"},
        {&comment,      "^\\s*#"},
        {&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
        {&token,        "^\\s*"                     /* possible leading ws */
-                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* token name */
+                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* 1: token name */
                        "\\s+"                      /* required whitespace */
-                        "((?:[^\\\\\\s]|\\\\.)+)"   /* token RE */
+                        "((?:[^\\\\\\s]|\\\\.)+)"   /* 2: token RE */
-                        "(?:\\s+\\[([^\\]]+)\\])?"  /* optional token flags */
+                        "(?:\\s+\\[([^\\]]+)\\])?"  /* 3: token flags */
                        "\\s*({{)?"                 /* 4: code section opener */
                        "\\s*$"},                   /* possible trailing ws */
-        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"}
+        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"},
        {&code_end_sec, "^\\s*}}\\s*$"}
    };
    const int ovec_size = 3 * 10;
    int ovector[ovec_size];
-    int lineno = 1;
+    int lineno = 0;
    char * newline;
    char * input = buff;
    string sn;
@ -102,6 +105,8 @@ bool Parser::parseInputFile(char * buff, int size)
    int section = none;
    string line;
    bool append_line = false;
    bool gathering_code = false;
    string code;
    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
    {
@ -113,16 +118,20 @@ bool Parser::parseInputFile(char * buff, int size)
        {
            cerr << "Error compiling regex '" << exprs[i].pattern <<
                "': " << errptr << " at position " << erroffset << endl;
            return false;
        }
    }
    while ((newline = strstr(input, "\n")) != NULL)
    {
        int line_length = newline - input;
-        if (newline[-1] == '\r')
+        if (line_length >= 1 && newline[-1] == '\r')
        {
            newline[-1] = '\n';
            line_length--;
        }
        lineno++;
        if (append_line)
        {
            line += string(input, line_length);
@ -131,28 +140,49 @@ bool Parser::parseInputFile(char * buff, int size)
        {
            line = string(input, line_length);
        }
-        if (line.size() > 0 && line[line.size()-1] == '\\')
+        input = newline + 1;        /* set up for next loop iteration */
        if (gathering_code)
        {
-            line[line.size()-1] = ' ';
+            if (pcre_exec(code_end_sec, NULL, line.c_str(), line.size(),
-            append_line = true;
+                        0, 0, ovector, ovec_size) >= 0)
            {
                gathering_code = false;
                code += "}\n";
                /* TODO: do something with gathered code */
            }
            else
            {
-            append_line = false;
+                code += line;
            }
-        if ( append_line
+            continue;
-          || (pcre_exec(empty, NULL, line.c_str(), line.size(),
+        }
        if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
                  0, 0, ovector, ovec_size) >= 0)
          || (pcre_exec(comment, NULL, line.c_str(), line.size(),
                  0, 0, ovector, ovec_size) >= 0)
           )
        {
-            /* nothing */;
+            /* skip empty or comment lines */;
            continue;
        }
-        else if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
+
        if (line.size() > 0 && line[line.size()-1] == '\\')
        {
            line[line.size()-1] = ' ';
            append_line = true;
            continue;
        }
        else
        {
            append_line = false;
        }
        if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
                    0, 0, ovector, ovec_size) >= 0)
        {
-            sn = string(input, ovector[2], ovector[3] - ovector[2]);
+            sn = string(line, ovector[2], ovector[3] - ovector[2]);
            if (sections.find(sn) != sections.end())
            {
                section = sections[sn];
@ -162,9 +192,9 @@ bool Parser::parseInputFile(char * buff, int size)
                cerr << "Unknown section name '" << sn << "'!" << endl;
                return false;
            }
            continue;
        }
-        else
+
        {
        switch (section)
        {
            case none:
@ -194,6 +224,12 @@ bool Parser::parseInputFile(char * buff, int size)
                            << lineno << endl;
                        return false;
                    }
                    if (ovector[8] >= 0 && ovector[9] >= 0
                            && ovector[9] - ovector[8] > 0)
                    {
                        code = ""; /* FIXME: function definition */
                        gathering_code = true;
                    }
                }
                else
                {
@ -228,14 +264,11 @@ bool Parser::parseInputFile(char * buff, int size)
                break;
        }
    }
-        input = newline + 1;
+
-        lineno++;
+    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
    {
        pcre_free(*exprs[i].re);
    }
    pcre_free(empty);
    pcre_free(comment);
    pcre_free(section_name);
    pcre_free(token);
    pcre_free(rule);
    return true;
 }