updated parse_input() to parse "rules" and "tokens" sections

This commit is contained in:
Josh Holtrop 2010-04-03 23:30:17 -04:00
parent 2f06a95257
commit d16cf33d5c
2 changed files with 88 additions and 43 deletions

View File

@ -1,6 +1,5 @@
#include <iostream> #include <iostream>
#include <string>
#include <stdio.h> #include <stdio.h>
#include <ctype.h> /* isspace() */ #include <ctype.h> /* isspace() */
#include "parse-input.h" #include "parse-input.h"
@ -14,27 +13,20 @@ using namespace std;
void parse_input(refptr< vector<unichar_t> > ucs) void parse_input(refptr< vector<unichar_t> > ucs)
{ {
enum State { INITIAL, SECTION_NAME, RULES, RULE_NAME, RULE_COLON, enum State { INITIAL, SECTION_NAME, RULES, RULE_NAME,
RULE_EQUALS, RULE_RHS }; RULE_EQUALS, RULE_RHS, TOKENS, TOKEN_NAME, TOKEN_EQUALS, TOKEN_RHS };
State state = INITIAL; State state = INITIAL;
int lineno = 1; int lineno = 1;
int colno = 1; int colno = 1;
bool error = false; bool error = false;
char errstr[200]; char errstr[200];
unistring build_str; unistring build_str;
struct { unistring name; unistring rhs; } rule;
struct { unistring name; unistring rhs; } token;
for (int i = 0, sz = ucs->size(); i < sz; i++) for (int i = 0, sz = ucs->size(); i < sz; i++)
{ {
unichar_t c = (*ucs)[i]; unichar_t c = (*ucs)[i];
if (c == '\n')
{
lineno++;
colno = 1;
}
else
{
colno++;
}
switch (state) switch (state)
{ {
case INITIAL: case INITIAL:
@ -60,6 +52,10 @@ void parse_input(refptr< vector<unichar_t> > ucs)
{ {
state = RULES; state = RULES;
} }
else if (build_str == "tokens")
{
state = TOKENS;
}
else else
{ {
SET_ERROR("Unknown section name"); SET_ERROR("Unknown section name");
@ -74,50 +70,31 @@ void parse_input(refptr< vector<unichar_t> > ucs)
} }
break; break;
case RULES: case RULES:
if (isspace(c)) if (c == '[')
{
state = SECTION_NAME;
build_str = "";
}
else if (isspace(c))
{ {
} }
else if ( ('a' <= c && c <= 'z') else
|| ('A' <= c && c <= 'Z')
|| (c == '_') )
{ {
build_str = ""; build_str = "";
build_str += c; build_str += c;
state = RULE_NAME; state = RULE_NAME;
} }
else
{
SET_ERROR("Unexpected character");
}
break; break;
case RULE_NAME: case RULE_NAME:
if ( ('a' <= c && c <= 'z') if (c == ':')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| (c == '_') )
{
build_str += c;
}
else if (isspace(c))
{
state = RULE_COLON;
}
else
{
SET_ERROR("Expected ':='");
}
break;
case RULE_COLON:
if (isspace(c))
{
}
else if (c == ':')
{ {
rule.name = build_str;
build_str = "";
state = RULE_EQUALS; state = RULE_EQUALS;
} }
else else
{ {
SET_ERROR("Expected ':='"); build_str += c;
} }
break; break;
case RULE_EQUALS: case RULE_EQUALS:
@ -131,7 +108,73 @@ void parse_input(refptr< vector<unichar_t> > ucs)
} }
break; break;
case RULE_RHS: case RULE_RHS:
if (c == '\n')
{
rule.rhs = build_str;
state = RULES;
}
else
{
build_str += c;
}
break; break;
case TOKENS:
if (c == '[')
{
state = SECTION_NAME;
build_str = "";
}
else
{
build_str = "";
build_str += c;
state = TOKEN_NAME;
}
break;
case TOKEN_NAME:
if (c == ':')
{
state = TOKEN_EQUALS;
}
else
{
build_str += c;
}
break;
case TOKEN_EQUALS:
if (c == '=')
{
token.name = build_str;
build_str = "";
state = TOKEN_RHS;
}
else
{
SET_ERROR("Expected '='");
}
break;
case TOKEN_RHS:
if (c == '\n')
{
token.rhs = build_str;
state = RULES;
}
else
{
build_str += c;
}
break;
}
/* update line and column position information */
if (c == '\n')
{
lineno++;
colno = 1;
}
else
{
colno++;
} }
if (error) if (error)

View File

@ -1,10 +1,12 @@
[rules] [tokens]
ASSIGN := ":=" ASSIGN := ":="
DASSIGN := ":==" DASSIGN := ":=="
IDENTIFIER := "[a-zA-Z_][a-zA-Z_0-9]*" IDENTIFIER := "[a-zA-Z_][a-zA-Z_0-9]*"
[rules]
Assignment := IDENTIFIER ASSIGN Expression Assignment := IDENTIFIER ASSIGN Expression
Expression := IDENTIFIER Expression := IDENTIFIER