From d16cf33d5c01baf2c856b51aa602e36a8f6275c7 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sat, 3 Apr 2010 23:30:17 -0400 Subject: [PATCH] updated parse_input() to parse "rules" and "tokens" sections --- parse-input.cc | 127 +++++++++++++++++++++++++++++++++---------------- testinput.i | 4 +- 2 files changed, 88 insertions(+), 43 deletions(-) diff --git a/parse-input.cc b/parse-input.cc index ca4fde2..30bf38c 100644 --- a/parse-input.cc +++ b/parse-input.cc @@ -1,6 +1,5 @@ #include -#include #include #include /* isspace() */ #include "parse-input.h" @@ -14,27 +13,20 @@ using namespace std; void parse_input(refptr< vector > ucs) { - enum State { INITIAL, SECTION_NAME, RULES, RULE_NAME, RULE_COLON, - RULE_EQUALS, RULE_RHS }; + enum State { INITIAL, SECTION_NAME, RULES, RULE_NAME, + RULE_EQUALS, RULE_RHS, TOKENS, TOKEN_NAME, TOKEN_EQUALS, TOKEN_RHS }; State state = INITIAL; int lineno = 1; int colno = 1; bool error = false; char errstr[200]; unistring build_str; + struct { unistring name; unistring rhs; } rule; + struct { unistring name; unistring rhs; } token; for (int i = 0, sz = ucs->size(); i < sz; i++) { unichar_t c = (*ucs)[i]; - if (c == '\n') - { - lineno++; - colno = 1; - } - else - { - colno++; - } switch (state) { case INITIAL: @@ -60,6 +52,10 @@ void parse_input(refptr< vector > ucs) { state = RULES; } + else if (build_str == "tokens") + { + state = TOKENS; + } else { SET_ERROR("Unknown section name"); @@ -74,50 +70,31 @@ void parse_input(refptr< vector > ucs) } break; case RULES: - if (isspace(c)) + if (c == '[') + { + state = SECTION_NAME; + build_str = ""; + } + else if (isspace(c)) { } - else if ( ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || (c == '_') ) + else { build_str = ""; build_str += c; state = RULE_NAME; } - else - { - SET_ERROR("Unexpected character"); - } break; case RULE_NAME: - if ( ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || (c == '_') ) - { - build_str += c; - } - else if (isspace(c)) - { - state = RULE_COLON; - } - else - { - SET_ERROR("Expected ':='"); - } - break; - case RULE_COLON: - if (isspace(c)) - { - } - else if (c == ':') + if (c == ':') { + rule.name = build_str; + build_str = ""; state = RULE_EQUALS; } else { - SET_ERROR("Expected ':='"); + build_str += c; } break; case RULE_EQUALS: @@ -131,7 +108,73 @@ void parse_input(refptr< vector > ucs) } break; case RULE_RHS: + if (c == '\n') + { + rule.rhs = build_str; + state = RULES; + } + else + { + build_str += c; + } break; + case TOKENS: + if (c == '[') + { + state = SECTION_NAME; + build_str = ""; + } + else + { + build_str = ""; + build_str += c; + state = TOKEN_NAME; + } + break; + case TOKEN_NAME: + if (c == ':') + { + state = TOKEN_EQUALS; + } + else + { + build_str += c; + } + break; + case TOKEN_EQUALS: + if (c == '=') + { + token.name = build_str; + build_str = ""; + state = TOKEN_RHS; + } + else + { + SET_ERROR("Expected '='"); + } + break; + case TOKEN_RHS: + if (c == '\n') + { + token.rhs = build_str; + state = RULES; + } + else + { + build_str += c; + } + break; + } + + /* update line and column position information */ + if (c == '\n') + { + lineno++; + colno = 1; + } + else + { + colno++; } if (error) diff --git a/testinput.i b/testinput.i index c13d963..7225678 100644 --- a/testinput.i +++ b/testinput.i @@ -1,10 +1,12 @@ -[rules] +[tokens] ASSIGN := ":=" DASSIGN := ":==" IDENTIFIER := "[a-zA-Z_][a-zA-Z_0-9]*" +[rules] + Assignment := IDENTIFIER ASSIGN Expression Expression := IDENTIFIER