51 changed files with 1317 additions and 2313 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,9 @@
-/.bundle/
-/.yardoc
-/_yardoc/
-/coverage/
-/doc/
-/pkg/
-/spec/reports/
-/tmp/
-/.rspec_status
-/spec/run/
+imbecile
+tags
+*.o
+.*.swp
+*.dep
+tmpl.*
+tests/*/itest.cc
+tests/*/itest.h
+tests/*/test
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "refptr"]
+	path = refptr
+	url = http://github.com/holtrop/refptr.git
--- a/.rspec
+++ b/.rspec
@ -1,3 +0,0 @@
--format documentation
--color
--require spec_helper
--- a/4
+++ b/4
@ -1,4 +0,0 @@
-source "https://rubygems.org"
-
-gem "rake"
-gem "rspec"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -1,28 +0,0 @@
-GEM
-  remote: https://rubygems.org/
-  specs:
-    diff-lcs (1.5.0)
-    rake (13.0.6)
-    rspec (3.11.0)
-      rspec-core (~> 3.11.0)
-      rspec-expectations (~> 3.11.0)
-      rspec-mocks (~> 3.11.0)
-    rspec-core (3.11.0)
-      rspec-support (~> 3.11.0)
-    rspec-expectations (3.11.0)
-      diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.11.0)
-    rspec-mocks (3.11.1)
-      diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.11.0)
-    rspec-support (3.11.0)
-
-PLATFORMS
-  ruby
-
-DEPENDENCIES
-  rake
-  rspec
-
-BUNDLED WITH
-   2.4.0.dev
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2010-2022 Josh Holtrop
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
--- a/61
+++ b/61
@ -0,0 +1,61 @@
+
+TARGET   := imbecile
+CXXOBJS  := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
+CXXDEPS  := $(patsubst %.o,.%.dep,$(CXXOBJS))
+CXXFLAGS := -O2
+DEPS     := $(CXXDEPS)
+OBJS     := $(CXXOBJS)
+LDFLAGS  := -lpcre
+CPPFLAGS := -I$(shell pwd)/refptr
+
+all: submodule_check tmpl.h $(TARGET)
+
+.PHONY: submodule_check
+submodule_check:
+	@if [ ! -e refptr/refptr.h ]; then \
+		echo Error: \"refptr\" folder is not populated.; \
+		echo Perhaps you forgot to do \"git checkout --recursive\"?; \
+		echo You can remedy the situation with \"git submodule update --init\".; \
+		exit 1; \
+	fi
+
+$(TARGET): $(OBJS)
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+# Object file rules
+%.o: %.cc
+	$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
+
+# Make dependency files
+.%.dep: %.c
+	@set -e; rm -f $@; \
+	  $(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
+
+.%.dep: %.cc tmpl.h
+	@set -e; rm -f $@; \
+	  $(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
+
+tmpl.cc: $(wildcard tmpl/*)
+	echo -n > $@
+	for f in $*/*; \
+		do xxd -i $$f >> $@; \
+	done
+
+tmpl.h: tmpl.cc
+	echo '#ifndef $*_h' > $@
+	echo '#define $*_h' >> $@
+	grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
+	echo '#endif' >> $@
+
+.PHONY: tests
+tests: PATH := $(shell pwd):$(PATH)
+tests: all
+	$(MAKE) -C $@
+
+tests-clean:
+	$(MAKE) -C tests clean
+
+clean: tests-clean
+	-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
+
+-include $(CXXDEPS)
--- a/Parser.cc
+++ b/Parser.cc
@ -0,0 +1,423 @@
+
+#include <stdio.h>
+#include <string.h>
+#include <pcre.h>
+#include <ctype.h>                  /* toupper() */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <map>
+
+#include "Parser.h"
+#include "TokenDefinition.h"
+#include "RuleDefinition.h"
+#include "tmpl.h"
+
+using namespace std;
+
+#define DEBUG
+
+Parser::Parser()
+    : m_classname("Parser"), m_namespace(""), m_extension("cc"),
+    m_token_data(new string()), m_token_code(new string()),
+    m_defines(new string())
+{
+}
+
+void Parser::makeDefine(const string & defname, const string & definition)
+{
+    *m_defines += string("#define ") + defname + " " + definition + "\n";
+}
+
+bool Parser::write(const string & fname)
+{
+    if (m_tokens.size() < 1 || m_rules.size() < 1)
+        return false;
+
+    string header_fname = fname + ".h";
+    string body_fname = fname + "." + m_extension;
+
+    ofstream header(header_fname.c_str());
+    ofstream body(body_fname.c_str());
+
+    /* process data */
+    refptr<string> token_classes = new string();
+    refptr<string> token_classes_code = new string();
+    int i = 0;
+    for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
+            it != m_tokens.end();
+            it++)
+    {
+        char buff[20];
+        sprintf(buff, "%d", i++);
+        makeDefine((*it)->getIdentifier(), buff);
+        *token_classes += (*it)->getClassDefinition();
+        *token_classes_code += (*it)->getProcessMethod();
+    }
+    if (m_namespace != "")
+    {
+        makeDefine("I_NAMESPACE", m_namespace);
+    }
+    makeDefine("I_CLASSNAME", m_classname);
+
+    /* set up replacements */
+    setReplacement("token_list", buildTokenList());
+    setReplacement("buildToken", buildBuildToken());
+    setReplacement("header_name",
+            new string(string("\"") + header_fname + "\""));
+    setReplacement("token_code", m_token_code);
+    setReplacement("token_data", m_token_data);
+    setReplacement("defines", m_defines);
+    setReplacement("token_classes", token_classes);
+    setReplacement("token_classes_code", token_classes_code);
+
+    /* write the header */
+    writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
+
+    /* write the body */
+    writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
+
+    header.close();
+    body.close();
+
+    return true;
+}
+
+bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
+{
+    char * newline;
+    char * data = dat;
+    const char * errptr;
+    int erroffset;
+    data[len-1] = '\n';
+    const int ovec_size = 6;
+    int ovector[ovec_size];
+    pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
+    while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
+    {
+        if (pcre_exec(replace, NULL, data, newline - data,
+                    0, 0, ovector, ovec_size) >= 0)
+        {
+            if (ovector[0] > 0)
+            {
+                out.write(data, ovector[0]);
+            }
+            out << *getReplacement(string(data, ovector[2],
+                        ovector[3] - ovector[2]));
+            if (ovector[1] < newline - data)
+            {
+                out.write(data + ovector[1], newline - data - ovector[1]);
+            }
+        }
+        else
+        {
+            out.write(data, newline - data);
+        }
+        out << '\n';
+        data = newline + 1;
+    }
+}
+
+refptr<std::string> Parser::getReplacement(const std::string & name)
+{
+    if (m_replacements.find(name) != m_replacements.end())
+    {
+        return m_replacements[name];
+    }
+#ifdef DEBUG
+    cerr << "No replacement found for \"" << name << "\"" << endl;
+#endif
+    return new string("");
+}
+
+refptr<string> Parser::buildTokenList()
+{
+    refptr<string> tokenlist = new string();
+    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
+            t != m_tokens.end();
+            t++)
+    {
+        if (t != m_tokens.begin())
+            *tokenlist += "    ";
+        *tokenlist += "{ \"" + (*t)->getName() + "\", \""
+            + (*t)->getCString() + "\", "
+            + ((*t)->getProcessFlag() ? "true" : "false") + " }";
+        if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
+            *tokenlist += ",\n";
+    }
+    return tokenlist;
+}
+
+refptr<string> Parser::buildBuildToken()
+{
+    refptr<string> buildToken = new string();
+    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
+            t != m_tokens.end();
+            t++)
+    {
+        *buildToken += "case " + (*t)->getIdentifier() + ":\n";
+        *buildToken += "    token = new " + (*t)->getClassName() + "();\n";
+        *buildToken += "    break;\n";
+    }
+    return buildToken;
+}
+
+bool Parser::parseInputFile(char * buff, int size)
+{
+    typedef pcre * pcre_ptr;
+    enum { none, tokens, rules };
+    pcre_ptr empty, comment, section_name, token, rule,
+             data_begin, data_end, code_begin, code_end;
+    struct { pcre_ptr * re; const char * pattern; } exprs[] = {
+        {&empty,        "^\\s*$"},
+        {&comment,      "^\\s*#"},
+        {&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
+        {&token,        "^\\s*"                     /* possible leading ws */
+                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* 1: token name */
+                        "\\s+"                      /* required whitespace */
+                        "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
+        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"},
+        {&data_begin,   "^\\s*\\${"},
+        {&data_end,     "\\$}"},
+        {&code_begin,   "^\\s*%{"},
+        {&code_end,     "%}"}
+    };
+    const int ovec_size = 3 * 10;
+    int ovector[ovec_size];
+    int lineno = 0;
+    char * newline;
+    char * input = buff;
+    string current_section_name;
+    map<string, int> sections;
+    sections["none"] = none;
+    sections["tokens"] = tokens;
+    sections["rules"] = rules;
+    int section = none;
+    string line;
+    bool append_line = false;
+    bool gathering_data = false;
+    bool gathering_code = false;
+    string gather;
+    bool continue_line = false;
+    TokenDefinitionRef current_token;
+
+    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
+    {
+        const char * errptr;
+        int erroffset;
+        *exprs[i].re = pcre_compile(exprs[i].pattern, 0,
+                &errptr, &erroffset, NULL);
+        if (*exprs[i].re == NULL)
+        {
+            cerr << "Error compiling regex '" << exprs[i].pattern <<
+                "': " << errptr << " at position " << erroffset << endl;
+            return false;
+        }
+    }
+
+    for (;;)
+    {
+        if (continue_line)
+        {
+            continue_line = false;
+        }
+        else
+        {
+            if ((newline = strstr(input, "\n")) == NULL)
+                break;
+            int line_length = newline - input;
+            if (line_length >= 1 && newline[-1] == '\r')
+            {
+                newline[-1] = '\n';
+                line_length--;
+            }
+            lineno++;
+
+            if (append_line)
+            {
+                line += string(input, line_length);
+            }
+            else
+            {
+                line = string(input, line_length);
+            }
+            input = newline + 1;        /* set up for next loop iteration */
+        }
+
+        if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
+                  0, 0, ovector, ovec_size) >= 0)
+          || (pcre_exec(comment, NULL, line.c_str(), line.size(),
+                  0, 0, ovector, ovec_size) >= 0)
+           )
+        {
+            /* skip empty or comment lines */;
+            continue;
+        }
+
+        if (! (gathering_code || gathering_data) )
+        {
+            if (line.size() > 0 && line[line.size()-1] == '\\')
+            {
+                line[line.size()-1] = ' ';
+                append_line = true;
+                continue;
+            }
+            else
+            {
+                append_line = false;
+            }
+
+            if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
+                        0, 0, ovector, ovec_size) >= 0)
+            {
+                current_section_name
+                    = string(line, ovector[2], ovector[3] - ovector[2]);
+                if (sections.find(current_section_name) != sections.end())
+                {
+                    section = sections[current_section_name];
+                }
+                else
+                {
+                    cerr << "Unknown section name '" << current_section_name
+                        << "'!" << endl;
+                    return false;
+                }
+                continue;
+            }
+        }
+
+        switch (section)
+        {
+            case none:
+                cerr << "Unrecognized input on line " << lineno << endl;
+                return false;
+            case tokens:
+                if      (gathering_data)
+                {
+                    if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
+                                0, 0, ovector, ovec_size) >= 0)
+                    {
+                        gather += string(line, 0, ovector[0]) + "\n";
+                        gathering_data = false;
+                        line = string(line, ovector[1]);
+                        continue_line = true;
+                        if (current_token.isNull())
+                        {
+                            *m_token_data += gather;
+                        }
+                        else
+                        {
+                            current_token->addData(gather);
+                        }
+                    }
+                    else
+                    {
+                        gather += line + "\n";
+                    }
+                    continue;
+                }
+                else if (gathering_code)
+                {
+                    if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
+                                0, 0, ovector, ovec_size) >= 0)
+                    {
+                        gather += string(line, 0, ovector[0]) + "\n";
+                        gathering_code = false;
+                        line = string(line, ovector[1]);
+                        continue_line = true;
+                        if (current_token.isNull())
+                        {
+                            *m_token_code += gather;
+                        }
+                        else
+                        {
+                            current_token->addCode(gather);
+                        }
+                    }
+                    else
+                    {
+                        gather += line + "\n";
+                    }
+                    continue;
+                }
+                else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    gathering_data = true;
+                    gather = "";
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
+                }
+                else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    gathering_code = true;
+                    gather = "";
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
+                }
+                else if (pcre_exec(token, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    string name(line, ovector[2], ovector[3] - ovector[2]);
+                    string definition(line,
+                            ovector[4], ovector[5] - ovector[4]);
+                    current_token = new TokenDefinition();
+                    if (current_token->create(name, definition))
+                    {
+                        addTokenDefinition(current_token);
+                    }
+                    else
+                    {
+                        cerr << "Error in token definition ending on line "
+                            << lineno << endl;
+                        return false;
+                    }
+                    line = string(line, ovector[1]);
+                    continue_line = true;
+                    continue;
+                }
+                else
+                {
+                    cerr << "Unrecognized input on line " << lineno << endl;
+                    return false;
+                }
+                break;
+            case rules:
+                if (pcre_exec(rule, NULL, line.c_str(), line.size(),
+                            0, 0, ovector, ovec_size) >= 0)
+                {
+                    string name(line, ovector[2], ovector[3] - ovector[2]);
+                    string definition(line,
+                            ovector[4], ovector[5] - ovector[4]);
+                    refptr<RuleDefinition> rd = new RuleDefinition();
+                    if (rd->create(name, definition))
+                    {
+                        addRuleDefinition(rd);
+                    }
+                    else
+                    {
+                        cerr << "Error in rule definition ending on line "
+                            << lineno << endl;
+                        return false;
+                    }
+                }
+                else
+                {
+                    cerr << "Unrecognized input on line " << lineno << endl;
+                    return false;
+                }
+                break;
+        }
+    }
+
+    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
+    {
+        pcre_free(*exprs[i].re);
+    }
+
+    return true;
+}
--- a/Parser.h
+++ b/Parser.h
@ -0,0 +1,61 @@
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <vector>
+#include <string>
+#include <list>
+#include <map>
+
+#include "refptr.h"
+#include "TokenDefinition.h"
+#include "RuleDefinition.h"
+
+class Parser
+{
+    public:
+        Parser();
+        void addTokenDefinition(refptr<TokenDefinition> td)
+        {
+            m_tokens.push_back(td);
+        }
+        void addRuleDefinition(refptr<RuleDefinition> rd)
+        {
+            m_rules.push_back(rd);
+        }
+        bool write(const std::string & fname);
+        bool parseInputFile(char * buff, int size);
+
+        void setClassName(const std::string & cn) { m_classname = cn; }
+        std::string getClassName() { return m_classname; }
+
+        void setNamespace(const std::string & ns) { m_namespace = ns; }
+        std::string getNamespace() { return m_namespace; }
+
+        void setExtension(const std::string & e) { m_extension = e; }
+        std::string getExtension() { return m_extension; }
+
+    protected:
+        refptr<std::string> buildTokenList();
+        refptr<std::string> buildBuildToken();
+        bool writeTmpl(std::ostream & out, char * dat, int len);
+        refptr<std::string> getReplacement(const std::string & name);
+        void setReplacement(const std::string & name, refptr<std::string> val)
+        {
+            m_replacements[name] = val;
+        }
+        void makeDefine(const std::string & defname,
+                const std::string & definition);
+
+        std::list<TokenDefinitionRef> m_tokens;
+        std::vector< refptr< RuleDefinition > > m_rules;
+        std::string m_classname;
+        std::string m_namespace;
+        std::string m_extension;
+        std::map< std::string, refptr<std::string> > m_replacements;
+        refptr<std::string> m_token_data;
+        refptr<std::string> m_token_code;
+        refptr<std::string> m_defines;
+};
+
+#endif
--- a/5
+++ b/5
@ -0,0 +1,5 @@
+Imbecile is a bottom-up parser generator. It targets C++ and automatically
+generates a class heirarchy for interacting with the parser.
+
+Imbecile generates both a lexer and a parser based on the rules given to
+it in the input file.
--- a/README.md
+++ b/README.md
@ -1,31 +0,0 @@
-# The Propane Parser Generator
-
-Propane is an LR Parser Generator (LPG) which:
-
-  * accepts LR(0), SLR, and LALR grammars
-  * generates a built-in lexer to tokenize input
-  * supports UTF-8 lexer inputs
-  * generates a table-driven parser to parse input in linear time
-  * is MIT-licensed
-  * is distributable as a standalone Ruby script
-
-## Installation
-
-TODO
-
-## Usage
-
-TODO: Write usage instructions here
-
-## Development
-
-After checking out the repository, run `bundle install` to install dependencies.
-Run `rake spec` to execute tests.
-
-## Contributing
-
-Bug reports and pull requests are welcome on GitHub at https://github.com/holtrop/propane.
-
-## License
-
-Propane is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
--- a/9
+++ b/9
@ -1,9 +0,0 @@
-require "rspec/core/rake_task"
-
-RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
-  if args.example_pattern
-    task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
-  end
-end
-
-task :default => :spec
--- a/RuleDefinition.cc
+++ b/RuleDefinition.cc
@ -0,0 +1,9 @@
+
+#include "RuleDefinition.h"
+
+using namespace std;
+
+bool RuleDefinition::create(const string & name, const string & definition)
+{
+    m_name = name;
+}
--- a/RuleDefinition.h
+++ b/RuleDefinition.h
@ -0,0 +1,16 @@
+
+#ifndef RULEDEFINITION_H
+#define RULEDEFINITION_H
+
+#include <string>
+
+class RuleDefinition
+{
+    public:
+        bool create(const std::string & name, const std::string & definition);
+
+    protected:
+        std::string m_name;
+};
+
+#endif
--- a/TokenDefinition.cc
+++ b/TokenDefinition.cc
@ -0,0 +1,125 @@
+
+#include <pcre.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "TokenDefinition.h"
+#include "refptr.h"
+
+using namespace std;
+
+#define WHITESPACE " \n\r\t\v"
+
+static string trim(string s)
+{
+    size_t lastpos = s.find_last_not_of(WHITESPACE);
+    if (lastpos == string::npos)
+        return "";
+    s.erase(lastpos + 1);
+    s.erase(0, s.find_first_not_of(WHITESPACE));
+    return s;
+}
+
+static refptr< vector<string> > split(const string & delim, string str)
+{
+    refptr< vector<string> > ret = new vector<string>();
+    size_t pos;
+    while ( (pos = str.find(delim)) != string::npos )
+    {
+        string t = str.substr(0, pos);
+        ret->push_back(t);
+        str.erase(0, pos + 1);
+    }
+    if (str != "")
+        ret->push_back(str);
+    return ret;
+}
+
+static string c_escape(const string & orig)
+{
+    string result;
+    for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
+    {
+        if (*it == '\\' || *it == '"')
+            result += '\\';
+        result += *it;
+    }
+    return result;
+}
+
+
+TokenDefinition::TokenDefinition()
+    : m_process(false)
+{
+}
+
+bool TokenDefinition::create(const string & name,
+        const string & definition)
+{
+    const char * errptr;
+    int erroffset;
+    pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
+    if (re == NULL)
+    {
+        cerr << "Error compiling regular expression '" << definition
+            << "' at position " << erroffset << ": " << errptr << endl;
+        return false;
+    }
+    m_name = name;
+    m_definition = definition;
+    pcre_free(re);
+
+#if 0
+    refptr< vector< string > > parts = split(",", flags);
+    for (int i = 0, sz = parts->size(); i < sz; i++)
+    {
+        (*parts)[i] = trim((*parts)[i]);
+        string & s = (*parts)[i];
+        if (s == "p")
+        {
+            m_process = true;
+        }
+        else
+        {
+            cerr << "Unknown token flag \"" << s << "\"" << endl;
+            return false;
+        }
+    }
+#endif
+
+    return true;
+}
+
+string TokenDefinition::getCString() const
+{
+    return c_escape(m_definition);
+}
+
+string TokenDefinition::getClassDefinition() const
+{
+    string ret = "class "+ getClassName() + " : public Token {\n";
+    ret += "public:\n";
+    if (m_process)
+    {
+        ret += "    virtual void process(const Matches & matches);\n";
+    }
+    ret += "\n";
+    ret += "protected:\n";
+    ret += m_data + "\n";
+    ret += "};\n";
+    return ret;
+}
+
+string TokenDefinition::getProcessMethod() const
+{
+    string ret;
+    if (m_code != "")
+    {
+        ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
+        ret += m_code + "\n";
+        ret += "}\n";
+    }
+    return ret;
+}
--- a/TokenDefinition.h
+++ b/TokenDefinition.h
@ -0,0 +1,37 @@
+
+#ifndef TOKENDEFINITION_H
+#define TOKENDEFINITION_H
+
+#include <string>
+#include "refptr.h"
+
+class TokenDefinition
+{
+    public:
+        TokenDefinition();
+        bool create(const std::string & name,
+                const std::string & definition);
+        std::string getCString() const;
+        std::string getName() const { return m_name; }
+        bool getProcessFlag() const { return m_process; }
+        void setProcessFlag(bool p) { m_process = p; }
+        void addData(const std::string & d) { m_data += d; }
+        std::string getData() const { return m_data; }
+        void addCode(const std::string & c) { m_code += c; m_process = true; }
+        std::string getCode() const { return m_code; }
+        std::string getClassDefinition() const;
+        std::string getProcessMethod() const;
+        std::string getIdentifier() const { return "TK_" + m_name; }
+        std::string getClassName() const { return "Tk" + m_name; }
+
+    protected:
+        std::string m_name;
+        std::string m_definition;
+        bool m_process;
+        std::string m_data;
+        std::string m_code;
+};
+
+typedef refptr<TokenDefinition> TokenDefinitionRef;
+
+#endif
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -1,252 +0,0 @@
-<% if @modulename %>
-module <%= @modulename %>;
-
-<% end %>
-class <%= classname %>
-{
-    enum
-    {
-<% @tokens.each_with_index do |(name, token), index| %>
-<%   if token.name %>
-        TOKEN_<%= token.c_name %> = <%= index %>,
-<%   end %>
-<% end %>
-        TOKEN_EOF = <%= TOKEN_EOF %>,
-        TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
-        TOKEN_DROP = <%= TOKEN_DROP %>,
-        TOKEN_NONE = <%= TOKEN_NONE %>,
-    }
-
-    static immutable string TokenNames[] = [
-<% @tokens.each_with_index do |(name, token), index| %>
-<%   if token.name %>
-        "<%= token.name %>",
-<%   else %>
-        null,
-<%   end %>
-<% end %>
-    ];
-
-    static class Decoder
-    {
-        enum
-        {
-            CODE_POINT_INVALID = 0xFFFFFFFE,
-            CODE_POINT_EOF = 0xFFFFFFFF,
-        }
-
-        struct DecodedCodePoint
-        {
-            uint code_point;
-            uint code_point_length;
-        }
-
-        static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length)
-        {
-            if (input_length == 0u)
-            {
-                return DecodedCodePoint(CODE_POINT_EOF, 0u);
-            }
-            ubyte c = *input;
-            uint code_point;
-            uint code_point_length;
-            if ((c & 0x80u) == 0u)
-            {
-                code_point = c;
-                code_point_length = 1u;
-            }
-            else
-            {
-                ubyte following_bytes;
-                if ((c & 0xE0u) == 0xC0u)
-                {
-                    code_point = c & 0x1Fu;
-                    following_bytes = 1u;
-                }
-                else if ((c & 0xF0u) == 0xE0u)
-                {
-                    code_point = c & 0x0Fu;
-                    following_bytes = 2u;
-                }
-                else if ((c & 0xF8u) == 0xF0u)
-                {
-                    code_point = c & 0x07u;
-                    following_bytes = 3u;
-                }
-                else if ((c & 0xFCu) == 0xF8u)
-                {
-                    code_point = c & 0x03u;
-                    following_bytes = 4u;
-                }
-                else if ((c & 0xFEu) == 0xFCu)
-                {
-                    code_point = c & 0x01u;
-                    following_bytes = 5u;
-                }
-                if (input_length <= following_bytes)
-                {
-                    return DecodedCodePoint(CODE_POINT_INVALID, 0u);
-                }
-                code_point_length = following_bytes + 1u;
-                while (following_bytes-- > 0u)
-                {
-                    input++;
-                    code_point <<= 6u;
-                    code_point |= *input & 0x3Fu;
-                }
-            }
-            return DecodedCodePoint(code_point, code_point_length);
-        }
-    }
-
-    static class Lexer
-    {
-        private struct Transition
-        {
-            uint first;
-            uint last;
-            uint destination;
-        }
-
-        private struct State
-        {
-            uint transition_table_index;
-            uint n_transitions;
-            uint accepts;
-        }
-
-<% transition_table, state_table = lexer.dfa.build_tables %>
-        private static const Transition transitions[] = [
-<% transition_table.each do |transition_table_entry| %>
-            Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
-<% end %>
-        ];
-
-        private static const State states[] = [
-<% state_table.each do |state_table_entry| %>
-            State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
-<% end %>
-        ];
-
-        struct LexedToken
-        {
-            size_t row;
-            size_t col;
-            size_t length;
-            uint token;
-        }
-
-        private const(ubyte) * m_input;
-        private size_t m_input_length;
-        private size_t m_input_position;
-        private size_t m_input_row;
-        private size_t m_input_col;
-
-        this(const(ubyte) * input, size_t input_length)
-        {
-            m_input = input;
-            m_input_length = input_length;
-        }
-
-        LexedToken lex_token()
-        {
-            for (;;)
-            {
-                LexedToken lt = attempt_lex_token();
-                if (lt.token != TOKEN_DROP)
-                {
-                    return lt;
-                }
-            }
-        }
-
-        private LexedToken attempt_lex_token()
-        {
-            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
-            struct LexedTokenState
-            {
-                size_t length;
-                size_t delta_row;
-                size_t delta_col;
-                uint token;
-            }
-            LexedTokenState last_accepts_info;
-            last_accepts_info.token = TOKEN_NONE;
-            LexedTokenState attempt_info;
-            uint current_state;
-            for (;;)
-            {
-                auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
-                if (decoded.code_point == Decoder.CODE_POINT_INVALID)
-                {
-                    lt.token = TOKEN_DECODE_ERROR;
-                    return lt;
-                }
-                bool lex_continue = false;
-                if (decoded.code_point != Decoder.CODE_POINT_EOF)
-                {
-                    uint dest = transition(current_state, decoded.code_point);
-                    if (dest != cast(uint)-1)
-                    {
-                        lex_continue = true;
-                        attempt_info.length += decoded.code_point_length;
-                        if (decoded.code_point == '\n')
-                        {
-                            attempt_info.delta_row++;
-                            attempt_info.delta_col = 0u;
-                        }
-                        else
-                        {
-                            attempt_info.delta_col++;
-                        }
-                        current_state = dest;
-                        if (states[current_state].accepts != TOKEN_NONE)
-                        {
-                            attempt_info.token = states[current_state].accepts;
-                            last_accepts_info = attempt_info;
-                        }
-                    }
-                }
-                else if (attempt_info.length == 0u)
-                {
-                    lt.token = TOKEN_EOF;
-                    break;
-                }
-                if (!lex_continue)
-                {
-                    if (last_accepts_info.token != TOKEN_NONE)
-                    {
-                        lt.token = last_accepts_info.token;
-                        lt.length = last_accepts_info.length;
-                        m_input_position += last_accepts_info.length;
-                        m_input_row += last_accepts_info.delta_row;
-                        if (last_accepts_info.delta_row != 0u)
-                        {
-                            m_input_col = last_accepts_info.delta_col;
-                        }
-                        else
-                        {
-                            m_input_col += last_accepts_info.delta_col;
-                        }
-                    }
-                    break;
-                }
-            }
-            return lt;
-        }
-
-        private uint transition(uint current_state, uint code_point)
-        {
-            uint transition_table_index = states[current_state].transition_table_index;
-            for (uint i = 0u; i < states[current_state].n_transitions; i++)
-            {
-                if ((transitions[transition_table_index + i].first <= code_point) &&
-                    (code_point <= transitions[transition_table_index + i].last))
-                {
-                    return transitions[transition_table_index + i].destination;
-                }
-            }
-            return cast(uint)-1;
-        }
-    }
-}
--- a/bin/propane
+++ b/bin/propane
@ -1,5 +0,0 @@
-#!/usr/bin/env ruby
-
-require "propane"
-
-exit Propane::CLI.run(ARGV.dup)
--- a/imbecile.cc
+++ b/imbecile.cc
@ -0,0 +1,101 @@
+
+#include <getopt.h>
+
+#include <iostream>
+#include <fstream>
+
+#include "refptr.h"
+#include "Parser.h"
+
+using namespace std;
+
+string buildOutputFilename(string & input_fname);
+
+int main(int argc, char * argv[])
+{
+    int longind = 1;
+    int opt;
+    Parser p;
+    string outfile;
+
+    static struct option longopts[] = {
+        /* name, has_arg, flag, val */
+        { "classname", required_argument, NULL, 'c' },
+        { "extension", required_argument, NULL, 'e' },
+        { "namespace", required_argument, NULL, 'n' },
+        { "outfile", required_argument, NULL, 'o' },
+        { NULL, 0, NULL, 0 }
+    };
+
+    while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
+    {
+        switch (opt)
+        {
+            case 'c':   /* classname */
+                p.setClassName(optarg);
+                break;
+            case 'e':   /* extension */
+                p.setExtension(optarg);
+                break;
+            case 'n':   /* namespace */
+                p.setNamespace(optarg);
+                break;
+            case 'o':   /* outfile */
+                outfile = optarg;
+                break;
+        }
+    }
+
+    if (optind >= argc)
+    {
+        cerr << "Usage: imbecile [options] <input-file>" << endl;
+        return 1;
+    }
+
+    string input_fname = argv[optind];
+    ifstream ifs;
+    ifs.open(input_fname.c_str(), ios::binary);
+    if (!ifs.is_open())
+    {
+        cerr << "Error opening input file: '" << input_fname << "'";
+        return 2;
+    }
+    ifs.seekg(0, ios_base::end);
+    int size = ifs.tellg();
+    ifs.seekg(0, ios_base::beg);
+    char * buff = new char[size];
+    ifs.read(buff, size);
+    ifs.close();
+
+    if (outfile == "")
+        outfile = buildOutputFilename(input_fname);
+
+    if (!p.parseInputFile(buff, size))
+    {
+        cerr << "Error parsing " << input_fname << endl;
+        return 3;
+    }
+    if (!p.write(outfile))
+    {
+        cerr << "Error processing " << input_fname << endl;
+        return 4;
+    }
+
+    delete[] buff;
+    return 0;
+}
+
+string buildOutputFilename(string & input_fname)
+{
+    string outfile;
+    size_t len = input_fname.length();
+    if (len > 2 && input_fname.substr(len - 2) == ".I")
+    {
+        outfile = input_fname.substr(0, len - 2);
+    }
+    else
+    {
+        outfile = input_fname;
+    }
+    return outfile;
+}
--- a/lib/propane.rb
+++ b/lib/propane.rb
@ -1,137 +0,0 @@
-require "erb"
-require "set"
-require_relative "propane/cli"
-require_relative "propane/code_point_range"
-require_relative "propane/fa"
-require_relative "propane/fa/state"
-require_relative "propane/fa/state/transition"
-require_relative "propane/lexer"
-require_relative "propane/lexer/dfa"
-require_relative "propane/parser"
-require_relative "propane/parser/item"
-require_relative "propane/parser/item_set"
-require_relative "propane/regex"
-require_relative "propane/regex/nfa"
-require_relative "propane/regex/unit"
-require_relative "propane/rule"
-require_relative "propane/token"
-require_relative "propane/version"
-
-class Propane
-
-  # EOF.
-  TOKEN_EOF = 0xFFFFFFFC
-
-  # Decoding error.
-  TOKEN_DECODE_ERROR = 0xFFFFFFFD
-
-  # Token ID for a "dropped" token.
-  TOKEN_DROP = 0xFFFFFFFE
-
-  # Invalid token ID.
-  TOKEN_NONE = 0xFFFFFFFF
-
-  class Error < RuntimeError
-  end
-
-  def initialize(input)
-    @tokens = {}
-    @rules = {}
-    input = input.gsub("\r\n", "\n")
-    while !input.empty?
-      parse_grammar(input)
-    end
-  end
-
-  def generate(output_file, log_file)
-    expand_rules
-    lexer = Lexer.new(@tokens)
-    parser = Parser.new(@tokens, @rules)
-    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
-    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
-    result = erb.result(binding.clone)
-    File.open(output_file, "wb") do |fh|
-      fh.write(result)
-    end
-  end
-
-  private
-
-  def parse_grammar(input)
-    if input.slice!(/\A\s+/)
-      # Skip white space.
-    elsif input.slice!(/\A#.*\n/)
-      # Skip comment lines.
-    elsif input.slice!(/\Amodule\s+(\S+)\n/)
-      @modulename = $1
-    elsif input.slice!(/\Aclass\s+(\S+)\n/)
-      @classname = $1
-    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
-      name, pattern = $1, $2
-      if pattern.nil?
-        pattern = name
-      end
-      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-        raise Error.new("Invalid token name #{name}")
-      end
-      if @tokens[name]
-        raise Error.new("Duplicate token name #{name}")
-      else
-        @tokens[name] = Token.new(name, pattern, @tokens.size)
-      end
-    elsif input.slice!(/\Adrop\s+(\S+)\n/)
-      pattern = $1
-      @tokens[name] = Token.new(nil, pattern, @tokens.size)
-    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
-      rule_name, components, code = $1, $2, $3
-      components = components.strip.split(/\s+/)
-      @rules[rule_name] ||= Rule.new(rule_name, @rules.size)
-      @rules[rule_name].add_pattern(components, code)
-    else
-      if input.size > 25
-        input = input.slice(0..20) + "..."
-      end
-      raise Error.new("Unexpected grammar input: #{input}")
-    end
-  end
-
-  def expand_rules
-    @rules.each do |rule_name, rule|
-      if @tokens.include?(rule_name)
-        raise Error.new("Rule name collides with token name #{rule_name}")
-      end
-    end
-    unless @rules["Start"]
-      raise Error.new("Start rule not found")
-    end
-    @rules.each do |rule_name, rule|
-      rule.patterns.each do |rule|
-        rule.components.map! do |component|
-          if @tokens[component]
-            @tokens[component]
-          elsif @rules[component]
-            @rules[component]
-          else
-            raise Error.new("Symbol #{component} not found")
-          end
-        end
-      end
-    end
-  end
-
-  class << self
-
-    def run(input_file, output_file, log_file)
-      begin
-        propane = Propane.new(File.read(input_file))
-        propane.generate(output_file, log_file)
-      rescue Error => e
-        $stderr.puts e.message
-        return 2
-      end
-      return 0
-    end
-
-  end
-
-end
--- a/lib/propane/cli.rb
+++ b/lib/propane/cli.rb
@ -1,54 +0,0 @@
-class Propane
-  module CLI
-
-    USAGE = <<EOF
-Usage: #{$0} [options] <input-file> <output-file>
-Options:
-  --log LOG   Write log file
-  --version   Show program version and exit
-  -h, --help  Show this usage and exit
-EOF
-
-    class << self
-
-      def run(args)
-        params = []
-        log_file = nil
-        i = 0
-        while i < args.size
-          arg = args[i]
-          case arg
-          when "--log"
-            if i + 1 < args.size
-              i += 1
-              log_file = args[i]
-            end
-          when "--version"
-            puts "propane v#{VERSION}"
-            return 0
-          when "-h", "--help"
-            puts USAGE
-            return 0
-          when /^-/
-            $stderr.puts "Error: unknown option #{arg}"
-            return 1
-          else
-            params << arg
-          end
-          i += 1
-        end
-        if params.size != 2
-          $stderr.puts "Error: specify input and output files"
-          return 1
-        end
-        unless File.readable?(params[0])
-          $stderr.puts "Error: cannot read #{params[0]}"
-          return 2
-        end
-        Propane.run(*params, log_file)
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/code_point_range.rb
+++ b/lib/propane/code_point_range.rb
@ -1,84 +0,0 @@
-class Propane
-  class CodePointRange
-
-    MAX_CODE_POINT = 0xFFFFFFFF
-
-    attr_reader :first
-    attr_reader :last
-
-    include Comparable
-
-    # Build a CodePointRange
-    def initialize(first, last = nil)
-      @first = first.ord
-      if last
-        @last = last.ord
-        if @last < @first
-          raise "Invalid CodePointRange: last code point must be > first code point"
-        end
-      else
-        @last = @first
-      end
-    end
-
-    def <=>(other)
-      if self.first != other.first
-        @first <=> other.first
-      else
-        @last <=> other.last
-      end
-    end
-
-    def include?(v)
-      if v.is_a?(CodePointRange)
-        @first <= v.first && v.last <= @last
-      else
-        @first <= v && v <= @last
-      end
-    end
-
-    def size
-      @last - @first + 1
-    end
-
-    class << self
-
-      def invert_ranges(code_point_ranges)
-        new_ranges = []
-        last_cp = -1
-        code_point_ranges.sort.each do |code_point_range|
-          if code_point_range.first > (last_cp + 1)
-            new_ranges << CodePointRange.new(last_cp + 1, code_point_range.first - 1)
-            last_cp = code_point_range.last
-          else
-            last_cp = [last_cp, code_point_range.last].max
-          end
-        end
-        if last_cp < MAX_CODE_POINT
-          new_ranges << CodePointRange.new(last_cp + 1, MAX_CODE_POINT)
-        end
-        new_ranges
-      end
-
-      def first_subrange(code_point_ranges)
-        code_point_ranges.sort.reduce do |result, code_point_range|
-          if code_point_range.include?(result.first)
-            if code_point_range.last < result.last
-              code_point_range
-            else
-              result
-            end
-          else
-            if code_point_range.first <= result.last
-              CodePointRange.new(result.first, code_point_range.first - 1)
-            else
-              result
-            end
-          end
-        end
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/fa.rb
+++ b/lib/propane/fa.rb
@ -1,61 +0,0 @@
-class Propane
-
-  class FA
-
-    attr_reader :start_state
-
-    def initialize
-      @start_state = State.new
-    end
-
-    def to_s
-      chr = lambda do |value|
-        if value < 32 || value > 127
-          "{#{value}}"
-        else
-          value.chr
-        end
-      end
-      rv = ""
-      states = enumerate
-      states.each do |state, id|
-        accepts_s = state.accepts ? " #{state.accepts}" : ""
-        rv += "#{id}#{accepts_s}:\n"
-        state.transitions.each do |transition|
-          if transition.nil?
-            range_s = "nil"
-          else
-            range_s = chr[transition.code_point_range.first]
-            if transition.code_point_range.size > 1
-              range_s += "-" + chr[transition.code_point_range.last]
-            end
-          end
-          accepts_s = transition.destination.accepts ? " #{transition.destination.accepts}" : ""
-          rv += "  #{range_s} => #{states[transition.destination]}#{accepts_s}\n"
-        end
-      end
-      rv
-    end
-
-    def enumerate
-      @_enumerated ||=
-        begin
-          id = 0
-          states = {}
-          visit = lambda do |state|
-            unless states.include?(state)
-              states[state] = id
-              id += 1
-              state.transitions.each do |transition|
-                visit[transition.destination]
-              end
-            end
-          end
-          visit[@start_state]
-          states
-        end
-    end
-
-  end
-
-end
--- a/lib/propane/fa/state.rb
+++ b/lib/propane/fa/state.rb
@ -1,51 +0,0 @@
-class Propane
-  class FA
-
-    class State
-
-      attr_accessor :accepts
-      attr_reader :transitions
-
-      def initialize
-        @transitions = []
-      end
-
-      def add_transition(code_point_range, destination)
-        @transitions << Transition.new(code_point_range, destination)
-      end
-
-      # Determine the set of states that can be reached by nil transitions.
-      # from this state.
-      #
-      # @return [Set<NFA::State>]
-      #   Set of states.
-      def nil_transition_states
-        states = Set[self]
-        analyze_state = lambda do |state|
-          state.nil_transitions.each do |transition|
-            unless states.include?(transition.destination)
-              states << transition.destination
-              analyze_state[transition.destination]
-            end
-          end
-        end
-        analyze_state[self]
-        states
-      end
-
-      def nil_transitions
-        @transitions.select do |transition|
-          transition.nil?
-        end
-      end
-
-      def cp_transitions
-        @transitions.reject do |transition|
-          transition.nil?
-        end
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/fa/state/transition.rb
+++ b/lib/propane/fa/state/transition.rb
@ -1,23 +0,0 @@
-class Propane
-  class FA
-    class State
-
-      class Transition
-
-        attr_reader :code_point_range
-        attr_reader :destination
-
-        def initialize(code_point_range, destination)
-          @code_point_range = code_point_range
-          @destination = destination
-        end
-
-        def nil?
-          @code_point_range.nil?
-        end
-
-      end
-
-    end
-  end
-end
--- a/lib/propane/lexer.rb
+++ b/lib/propane/lexer.rb
@ -1,13 +0,0 @@
-class Propane
-  class Lexer
-
-    # @return [DFA]
-    #   Lexer DFA.
-    attr_accessor :dfa
-
-    def initialize(tokens)
-      @dfa = DFA.new(tokens)
-    end
-
-  end
-end
--- a/lib/propane/lexer/dfa.rb
+++ b/lib/propane/lexer/dfa.rb
@ -1,118 +0,0 @@
-class Propane
-  class Lexer
-
-    class DFA < FA
-
-      def initialize(tokens)
-        super()
-        start_nfa = Regex::NFA.new
-        tokens.each do |name, token|
-          start_nfa.start_state.add_transition(nil, token.nfa.start_state)
-        end
-        @nfa_state_sets = {}
-        @states = []
-        @to_process = Set.new
-        nil_transition_states = start_nfa.start_state.nil_transition_states
-        register_nfa_state_set(nil_transition_states)
-        while @to_process.size > 0
-          state_set = @to_process.first
-          @to_process.delete(state_set)
-          process_nfa_state_set(state_set)
-        end
-        @start_state = @states[0]
-      end
-
-      def build_tables
-        transition_table = []
-        state_table = []
-        states = enumerate
-        states.each do |state, id|
-          accepts =
-            if state.accepts.nil?
-              TOKEN_NONE
-            elsif state.accepts.name
-              state.accepts.id
-            else
-              TOKEN_DROP
-            end
-          state_table << {
-            transition_table_index: transition_table.size,
-            n_transitions: state.transitions.size,
-            accepts: accepts,
-          }
-          state.transitions.each do |transition|
-            transition_table << {
-              first: transition.code_point_range.first,
-              last: transition.code_point_range.last,
-              destination: states[transition.destination],
-            }
-          end
-        end
-        [transition_table, state_table]
-      end
-
-      private
-
-      def register_nfa_state_set(nfa_state_set)
-        unless @nfa_state_sets.include?(nfa_state_set)
-          state_id = @states.size
-          @nfa_state_sets[nfa_state_set] = state_id
-          @states << State.new
-          @to_process << nfa_state_set
-        end
-      end
-
-      def process_nfa_state_set(nfa_state_set)
-        state_id = @nfa_state_sets[nfa_state_set]
-        state = @states[state_id]
-        if state_id > 0
-          nfa_state_set.each do |nfa_state|
-            if nfa_state.accepts
-              if state.accepts
-                if nfa_state.accepts.id < state.accepts.id
-                  state.accepts = nfa_state.accepts
-                end
-              else
-                state.accepts = nfa_state.accepts
-              end
-            end
-          end
-        end
-        transitions = transitions_for(nfa_state_set)
-        while transitions.size > 0
-          subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
-          dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
-            if transition.code_point_range.include?(subrange)
-              result << transition.destination
-            end
-            result
-          end
-          dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
-            result + dest_nfa_state.nil_transition_states
-          end
-          register_nfa_state_set(dest_nfa_states)
-          dest_state = @states[@nfa_state_sets[dest_nfa_states]]
-          state.add_transition(subrange, dest_state)
-          transitions.delete_if do |transition|
-            transition.code_point_range.last <= subrange.last
-          end
-          transitions.map! do |transition|
-            if transition.code_point_range.first <= subrange.last
-              Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
-            else
-              transition
-            end
-          end
-        end
-      end
-
-      def transitions_for(nfa_state_set)
-        nfa_state_set.reduce([]) do |result, state|
-          result + state.cp_transitions
-        end
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/parser.rb
+++ b/lib/propane/parser.rb
@ -1,84 +0,0 @@
-class Propane
-
-  class Parser
-
-    def initialize(tokens, rules)
-      @token_eof = Token.new("$", nil, TOKEN_EOF)
-      @item_sets = []
-      @item_sets_set = {}
-      start_items = rules["Start"].patterns.map do |pattern|
-        pattern.components << @token_eof
-        Item.new(pattern, 0)
-      end
-      eval_item_sets = Set.new
-      eval_item_sets << ItemSet.new(start_items)
-
-      while eval_item_sets.size > 0
-        this_eval_item_sets = eval_item_sets
-        eval_item_sets = Set.new
-        this_eval_item_sets.each do |item_set|
-          unless @item_sets_set.include?(item_set)
-            item_set.id = @item_sets.size
-            @item_sets << item_set
-            @item_sets_set[item_set] = item_set
-            item_set.follow_symbols.each do |follow_symbol|
-              unless follow_symbol == @token_eof
-                follow_set = item_set.build_follow_set(follow_symbol)
-                eval_item_sets << follow_set
-              end
-            end
-          end
-        end
-      end
-
-      @item_sets.each do |item_set|
-        process_item_set(item_set)
-        puts "Item set #{item_set.id}:"
-        ids = item_set.in_sets.map(&:id)
-        if ids.size > 0
-          puts "    (in from #{ids.join(", ")})"
-        end
-        puts item_set
-        item_set.follow_item_set.each do |follow_symbol, follow_item_set|
-          puts " #{follow_symbol.name} => #{follow_item_set.id}"
-        end
-        puts
-      end
-    end
-
-    def build_tables
-      shift_table = []
-      state_table = []
-      @item_sets.each do |item_set|
-        shift_entries = item_set.follow_symbols.select do |follow_symbol|
-          follow_symbol.is_a?(Token)
-        end.map do |follow_symbol|
-          {
-            token_id: follow_symbol.id,
-            state_id: item_set.follow_item_set[follow_symbol].id,
-          }
-        end
-        state_table << {
-          shift_index: shift_table.size,
-          n_shifts: shift_entries.size,
-        }
-        shift_table += shift_entries
-      end
-      [state_table, shift_table]
-    end
-
-    private
-
-    def process_item_set(item_set)
-      item_set.follow_symbols.each do |follow_symbol|
-        unless follow_symbol == @token_eof
-          follow_set = @item_sets_set[item_set.build_follow_set(follow_symbol)]
-          item_set.follow_item_set[follow_symbol] = follow_set
-          follow_set.in_sets << item_set
-        end
-      end
-    end
-
-  end
-
-end
--- a/lib/propane/parser/item.rb
+++ b/lib/propane/parser/item.rb
@ -1,69 +0,0 @@
-class Propane
-  class Parser
-
-    class Item
-
-      attr_reader :pattern
-      attr_reader :position
-
-      def initialize(pattern, position)
-        @pattern = pattern
-        @position = position
-      end
-
-      def next_component
-        @pattern.components[@position]
-      end
-
-      def hash
-        [@pattern, @position].hash
-      end
-
-      def ==(other)
-        @pattern == other.pattern && @position == other.position
-      end
-
-      def eql?(other)
-        self == other
-      end
-
-      def closed_items
-        if @pattern.components[@position].is_a?(Rule)
-          @pattern.components[@position].patterns.map do |pattern|
-            Item.new(pattern, 0)
-          end
-        else
-          []
-        end
-      end
-
-      def follow_symbol
-        @pattern.components[@position]
-      end
-
-      def followed_by?(symbol)
-        follow_symbol == symbol
-      end
-
-      def next_position
-        Item.new(@pattern, @position + 1)
-      end
-
-      def to_s
-        parts = []
-        @pattern.components.each_with_index do |symbol, index|
-          if @position == index
-            parts << "."
-          end
-          parts << symbol.name
-        end
-        if @position == @pattern.components.size
-          parts << "."
-        end
-        "#{@pattern.rule.name} -> #{parts.join(" ")}"
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/parser/item_set.rb
+++ b/lib/propane/parser/item_set.rb
@ -1,76 +0,0 @@
-class Propane
-  class Parser
-
-    class ItemSet
-
-      attr_reader :items
-
-      attr_accessor :id
-
-      # @return [Hash]
-      #   Maps a follow symbol to its item set.
-      attr_reader :follow_item_set
-
-      # @return [Set]
-      #   Item sets leading to this item set.
-      attr_reader :in_sets
-
-      def initialize(items)
-        @items = Set.new(items)
-        @follow_item_set = {}
-        @in_sets = Set.new
-        close!
-      end
-
-      def follow_symbols
-        Set.new(@items.map(&:follow_symbol).compact)
-      end
-
-      def build_follow_set(symbol)
-        ItemSet.new(items_followed_by(symbol).map(&:next_position))
-      end
-
-      def hash
-        @items.hash
-      end
-
-      def ==(other)
-        @items.eql?(other.items)
-      end
-
-      def eql?(other)
-        self == other
-      end
-
-      def to_s
-        @items.map(&:to_s).join("\n")
-      end
-
-      private
-
-      def close!
-        eval_items = @items
-        while eval_items.size > 0
-          this_eval_items = eval_items
-          eval_items = Set.new
-          this_eval_items.each do |item|
-            item.closed_items.each do |new_item|
-              unless @items.include?(new_item)
-                eval_items << new_item
-              end
-            end
-          end
-          @items += eval_items
-        end
-      end
-
-      def items_followed_by(symbol)
-        @items.select do |item|
-          item.followed_by?(symbol)
-        end
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/regex.rb
+++ b/lib/propane/regex.rb
@ -1,162 +0,0 @@
-class Propane
-  class Regex
-
-    attr_reader :unit
-    attr_reader :nfa
-
-    def initialize(pattern)
-      @pattern = pattern.dup
-      @unit = parse_alternates
-      @nfa = @unit.to_nfa
-      if @pattern != ""
-        raise Error.new(%[Unexpected "#{@pattern}" in pattern])
-      end
-    end
-
-    private
-
-    def parse_alternates
-      au = AlternatesUnit.new
-      while @pattern != ""
-        c = @pattern[0]
-        return au if c == ")"
-        @pattern.slice!(0)
-        case c
-        when "["
-          au << parse_character_class
-        when "("
-          au << parse_group
-        when "*", "+", "?", "{"
-          if last_unit = au.last_unit
-            case c
-            when "*"
-              min_count, max_count = 0, nil
-            when "+"
-              min_count, max_count = 1, nil
-            when "?"
-              min_count, max_count = 0, 1
-            when "{"
-              min_count, max_count = parse_curly_count
-            end
-            mu = MultiplicityUnit.new(last_unit, min_count, max_count)
-            au.replace_last!(mu)
-          else
-            raise Error.new("#{c} follows nothing")
-          end
-        when "|"
-          au.new_alternate!
-        when "\\"
-          au << parse_backslash
-        when "."
-          au << period_character_class
-        else
-          au << CharacterRangeUnit.new(c)
-        end
-      end
-      au
-    end
-
-    def parse_group
-      au = parse_alternates
-      if @pattern[0] != ")"
-        raise Error.new("Unterminated group in pattern")
-      end
-      @pattern.slice!(0)
-      au
-    end
-
-    def parse_character_class
-      ccu = CharacterClassUnit.new
-      index = 0
-      loop do
-        if @pattern == ""
-          raise Error.new("Unterminated character class")
-        end
-        c = @pattern.slice!(0)
-        if c == "]"
-          break
-        elsif c == "^" && index == 0
-          ccu.negate = true
-        elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
-          ccu << CharacterRangeUnit.new(c)
-        elsif c == "\\"
-          ccu << parse_backslash
-        elsif c == "-" && @pattern[0] != "]"
-          begin_cu = ccu.last_unit
-          unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.code_point_range.size == 1
-            raise Error.new("Character range must be between single characters")
-          end
-          if @pattern[0] == "\\"
-            @pattern.slice!(0)
-            end_cu = parse_backslash
-            unless end_cu.is_a?(CharacterRangeUnit) && end_cu.code_point_range.size == 1
-              raise Error.new("Character range must be between single characters")
-            end
-            max_code_point = end_cu.code_point
-          else
-            max_code_point = @pattern[0].ord
-            @pattern.slice!(0)
-          end
-          cru = CharacterRangeUnit.new(begin_cu.first, max_code_point)
-          ccu.replace_last!(cru)
-        else
-          ccu << CharacterRangeUnit.new(c)
-        end
-        index += 1
-      end
-      ccu
-    end
-
-    def parse_curly_count
-      if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
-        min_count, comma, max_count, pattern = $1, $2, $3, $4
-        min_count = min_count.to_i
-        if comma.to_s == ""
-          max_count = min_count
-        elsif max_count.to_s != ""
-          max_count = max_count.to_i
-          if max_count < min_count
-            raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
-          end
-        else
-          max_count = nil
-        end
-        @pattern = pattern
-        [min_count, max_count]
-      else
-        raise Error.new("Unexpected match count at #{@pattern}")
-      end
-    end
-
-    def parse_backslash
-      if @pattern == ""
-        raise Error.new("Error: unfollowed \\")
-      else
-        c = @pattern.slice!(0)
-        case c
-        when "d"
-          CharacterRangeUnit.new("0", "9")
-        when "s"
-          ccu = CharacterClassUnit.new
-          ccu << CharacterRangeUnit.new(" ")
-          ccu << CharacterRangeUnit.new("\t")
-          ccu << CharacterRangeUnit.new("\r")
-          ccu << CharacterRangeUnit.new("\n")
-          ccu << CharacterRangeUnit.new("\f")
-          ccu << CharacterRangeUnit.new("\v")
-          ccu
-        else
-          CharacterRangeUnit.new(c)
-        end
-      end
-    end
-
-    def period_character_class
-      ccu = CharacterClassUnit.new
-      ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
-      ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
-      ccu
-    end
-
-  end
-end
--- a/lib/propane/regex/nfa.rb
+++ b/lib/propane/regex/nfa.rb
@ -1,26 +0,0 @@
-class Propane
-  class Regex
-
-    class NFA < FA
-
-      attr_reader :end_state
-
-      def initialize
-        super()
-        @end_state = State.new
-      end
-
-      class << self
-
-        def empty
-          nfa = NFA.new
-          nfa.start_state.add_transition(nil, nfa.end_state)
-          nfa
-        end
-
-      end
-
-    end
-
-  end
-end
--- a/lib/propane/regex/unit.rb
+++ b/lib/propane/regex/unit.rb
@ -1,172 +0,0 @@
-class Propane
-  class Regex
-
-    class Unit
-    end
-
-    class SequenceUnit < Unit
-      attr_accessor :units
-      def initialize
-        @units = []
-      end
-      def method_missing(*args)
-        @units.__send__(*args)
-      end
-      def to_nfa
-        if @units.empty?
-          NFA.empty
-        else
-          nfa = NFA.new
-          unit_nfas = @units.map do |unit|
-            unit.to_nfa
-          end
-          nfa.start_state.add_transition(nil, unit_nfas[0].start_state)
-          unit_nfas.reduce do |prev_nfa, next_nfa|
-            prev_nfa.end_state.add_transition(nil, next_nfa.start_state)
-            next_nfa
-          end.end_state.add_transition(nil, nfa.end_state)
-          nfa
-        end
-      end
-    end
-
-    class AlternatesUnit < Unit
-      attr_accessor :alternates
-      def initialize
-        @alternates = []
-        new_alternate!
-      end
-      def new_alternate!
-        @alternates << SequenceUnit.new
-      end
-      def <<(unit)
-        @alternates[-1] << unit
-      end
-      def last_unit
-        @alternates[-1][-1]
-      end
-      def replace_last!(new_unit)
-        @alternates[-1][-1] = new_unit
-      end
-      def to_nfa
-        if @alternates.size == 0
-          NFA.empty
-        elsif @alternates.size == 1
-          @alternates[0].to_nfa
-        else
-          nfa = NFA.new
-          alternate_nfas = @alternates.map do |alternate|
-            alternate.to_nfa
-          end
-          alternate_nfas.each do |alternate_nfa|
-            nfa.start_state.add_transition(nil, alternate_nfa.start_state)
-            alternate_nfa.end_state.add_transition(nil, nfa.end_state)
-          end
-          nfa
-        end
-      end
-    end
-
-    class CharacterRangeUnit < Unit
-      attr_reader :code_point_range
-      def initialize(c1, c2 = nil)
-        @code_point_range = CodePointRange.new(c1, c2)
-      end
-      def first
-        @code_point_range.first
-      end
-      def last
-        @code_point_range.last
-      end
-      def to_nfa
-        nfa = NFA.new
-        nfa.start_state.add_transition(@code_point_range, nfa.end_state)
-        nfa
-      end
-    end
-
-    class CharacterClassUnit < Unit
-      attr_accessor :units
-      attr_accessor :negate
-      def initialize
-        @units = []
-        @negate = false
-      end
-      def initialize
-        @units = []
-      end
-      def method_missing(*args)
-        @units.__send__(*args)
-      end
-      def <<(thing)
-        if thing.is_a?(CharacterClassUnit)
-          thing.each do |ccu_unit|
-            @units << ccu_unit
-          end
-        else
-          @units << thing
-        end
-      end
-      def last_unit
-        @units[-1]
-      end
-      def replace_last!(new_unit)
-        @units[-1] = new_unit
-      end
-      def to_nfa
-        nfa = NFA.new
-        if @units.empty?
-          nfa.start_state.add_transition(nil, nfa.end_state)
-        else
-          code_point_ranges = @units.map(&:code_point_range)
-          if @negate
-            code_point_ranges = CodePointRange.invert_ranges(code_point_ranges)
-          end
-          code_point_ranges.each do |code_point_range|
-            nfa.start_state.add_transition(code_point_range, nfa.end_state)
-          end
-        end
-        nfa
-      end
-    end
-
-    class MultiplicityUnit < Unit
-      attr_accessor :unit
-      attr_accessor :min_count
-      attr_accessor :max_count
-      def initialize(unit, min_count, max_count)
-        @unit = unit
-        @min_count = min_count
-        @max_count = max_count
-      end
-      def to_nfa
-        nfa = NFA.new
-        last_state = nfa.start_state
-        unit_nfa = nil
-        @min_count.times do
-          unit_nfa = @unit.to_nfa
-          last_state.add_transition(nil, unit_nfa.start_state)
-          last_state = unit_nfa.end_state
-        end
-        last_state.add_transition(nil, nfa.end_state)
-        if @max_count.nil?
-          if @min_count == 0
-            unit_nfa = @unit.to_nfa
-            last_state.add_transition(nil, unit_nfa.start_state)
-          end
-          unit_nfa.end_state.add_transition(nil, unit_nfa.start_state)
-          unit_nfa.end_state.add_transition(nil, nfa.end_state)
-        else
-          (@max_count - @min_count).times do
-            unit_nfa = @unit.to_nfa
-            last_state.add_transition(nil, unit_nfa.start_state)
-            unit_nfa.end_state.add_transition(nil, nfa.end_state)
-            last_state = unit_nfa.end_state
-          end
-        end
-        nfa
-      end
-    end
-
-  end
-end
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -1,39 +0,0 @@
-class Propane
-
-  class Rule
-
-    class Pattern
-
-      attr_reader :rule
-
-      attr_reader :components
-
-      attr_reader :code
-
-      def initialize(rule, components, code)
-        @rule = rule
-        @components = components
-        @code = code
-      end
-
-    end
-
-    attr_reader :id
-
-    attr_reader :name
-
-    attr_reader :patterns
-
-    def initialize(name, id)
-      @name = name
-      @id = id
-      @patterns = []
-    end
-
-    def add_pattern(components, code)
-      @patterns << Pattern.new(self, components, code)
-    end
-
-  end
-
-end
--- a/lib/propane/token.rb
+++ b/lib/propane/token.rb
@ -1,42 +0,0 @@
-class Propane
-
-  class Token
-
-    # @return [String]
-    #   Token name.
-    attr_reader :name
-
-    # @return [String]
-    #   Token pattern.
-    attr_reader :pattern
-
-    # @return [Integer]
-    #   Token ID.
-    attr_reader :id
-
-    # @return [Regex::NFA]
-    #   Regex NFA for matching the token.
-    attr_reader :nfa
-
-    def initialize(name, pattern, id)
-      @name = name
-      @pattern = pattern
-      @id = id
-      unless pattern.nil?
-        regex = Regex.new(pattern)
-        regex.nfa.end_state.accepts = self
-        @nfa = regex.nfa
-      end
-    end
-
-    def c_name
-      @name.upcase
-    end
-
-    def to_s
-      @name
-    end
-
-  end
-
-end
--- a/lib/propane/version.rb
+++ b/lib/propane/version.rb
@ -1,3 +0,0 @@
-class Propane
-  VERSION = "0.1.0"
-end
--- a/propane.sh
+++ b/propane.sh
@ -1,2 +0,0 @@
-#!/bin/sh
-exec bundle exec ruby -Ilib bin/propane "$@"
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef
--- a/spec/propane/code_point_range_spec.rb
+++ b/spec/propane/code_point_range_spec.rb
@ -1,87 +0,0 @@
-class Propane
-  describe CodePointRange do
-
-    describe "#<=>" do
-      it "sorts ranges" do
-        arr = [
-          CodePointRange.new(100,102),
-          CodePointRange.new(65, 68),
-          CodePointRange.new(65, 65),
-          CodePointRange.new(100, 100),
-          CodePointRange.new(68, 70),
-        ]
-        arr.sort!
-        expect(arr[0]).to eq CodePointRange.new(65, 65)
-        expect(arr[1]).to eq CodePointRange.new(65, 68)
-        expect(arr[2]).to eq CodePointRange.new(68, 70)
-        expect(arr[3]).to eq CodePointRange.new(100, 100)
-        expect(arr[4]).to eq CodePointRange.new(100, 102)
-      end
-    end
-
-    describe "#include?" do
-      it "returns whether the code point is included in the range" do
-        expect(CodePointRange.new(100).include?(100)).to be_truthy
-        expect(CodePointRange.new(100, 100).include?(99)).to be_falsey
-        expect(CodePointRange.new(100, 100).include?(101)).to be_falsey
-        expect(CodePointRange.new(100, 120).include?(99)).to be_falsey
-        expect(CodePointRange.new(100, 120).include?(100)).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(110)).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(120)).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(121)).to be_falsey
-      end
-
-      it "returns whether the range is included in the range" do
-        expect(CodePointRange.new(100).include?(CodePointRange.new(100))).to be_truthy
-        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99))).to be_falsey
-        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99, 100))).to be_falsey
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(90, 110))).to be_falsey
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 130))).to be_falsey
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 120))).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 110))).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 120))).to be_truthy
-        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(102, 118))).to be_truthy
-      end
-    end
-
-    describe ".invert_ranges" do
-      it "inverts ranges" do
-        expect(CodePointRange.invert_ranges(
-          [CodePointRange.new(60, 90),
-           CodePointRange.new(80, 85),
-           CodePointRange.new(80, 100),
-           CodePointRange.new(101),
-           CodePointRange.new(200, 300)])).to eq [
-             CodePointRange.new(0, 59),
-             CodePointRange.new(102, 199),
-             CodePointRange.new(301, 0xFFFFFFFF)]
-        expect(CodePointRange.invert_ranges(
-          [CodePointRange.new(0, 500),
-           CodePointRange.new(7000, 0xFFFFFFFF)])).to eq [
-             CodePointRange.new(501, 6999)]
-      end
-    end
-
-    describe ".first_subrange" do
-      it "returns the first subrange to split" do
-        expect(CodePointRange.first_subrange(
-          [CodePointRange.new(65, 90),
-           CodePointRange.new(66, 66),
-           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65)
-        expect(CodePointRange.first_subrange(
-          [CodePointRange.new(65, 90)])).to eq CodePointRange.new(65, 90)
-        expect(CodePointRange.first_subrange(
-          [CodePointRange.new(65, 90),
-           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65, 79)
-        expect(CodePointRange.first_subrange(
-          [CodePointRange.new(65, 90),
-           CodePointRange.new(65, 100),
-           CodePointRange.new(65, 95)])).to eq CodePointRange.new(65, 90)
-        expect(CodePointRange.first_subrange(
-          [CodePointRange.new(100, 120),
-           CodePointRange.new(70, 90)])).to eq CodePointRange.new(70, 90)
-      end
-    end
-
-  end
-end
--- a/spec/propane/lexer/dfa_spec.rb
+++ b/spec/propane/lexer/dfa_spec.rb
@ -1,121 +0,0 @@
-class TestLexer
-  def initialize(token_dfa)
-    @token_dfa = token_dfa
-  end
-
-  def lex(input)
-    input_chars = input.chars
-    output = []
-    while lexed_token = lex_token(input_chars)
-      output << lexed_token
-      input_chars.slice!(0, lexed_token[1].size)
-    end
-    unless input_chars.empty?
-      raise "Unmatched input #{input_chars.join(" ")}"
-    end
-    output
-  end
-
-  def lex_token(input_chars)
-    return nil if input_chars.empty?
-    s = ""
-    current_state = @token_dfa.start_state
-    last_accepts = nil
-    last_s = nil
-    input_chars.each_with_index do |input_char, index|
-      if next_state = transition(current_state, input_char)
-        s += input_char
-        current_state = next_state
-        if current_state.accepts
-          last_accepts = current_state.accepts
-          last_s = s
-        end
-      else
-        break
-      end
-    end
-    if last_accepts
-      [last_accepts.name, last_s]
-    end
-  end
-
-  def transition(state, input_char)
-    state.transitions.each do |transition|
-      if transition.code_point_range.include?(input_char.ord)
-        return transition.destination
-      end
-    end
-    nil
-  end
-end
-
-def run(grammar, input)
-  propane = Propane.new(grammar)
-  token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
-  test_lexer = TestLexer.new(token_dfa)
-  test_lexer.lex(input)
-end
-
-describe Propane::Lexer::DFA do
-  it "lexes a simple token" do
-    expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
-token foo
-EOF
-  end
-
-  it "lexes two tokens" do
-    expected = [
-      ["foo", "foo"],
-      ["bar", "bar"],
-    ]
-    expect(run(<<EOF, "foobar")).to eq expected
-token foo
-token bar
-EOF
-  end
-
-  it "lexes the longer of multiple options" do
-    expected = [
-      ["identifier", "foobar"],
-    ]
-    expect(run(<<EOF, "foobar")).to eq expected
-token foo
-token bar
-token identifier [a-z]+
-EOF
-    expected = [
-      ["plusplus", "++"],
-      ["plus", "+"],
-    ]
-    expect(run(<<EOF, "+++")).to eq expected
-token plus \\+
-token plusplus \\+\\+
-EOF
-  end
-
-  it "lexes whitespace" do
-    expected = [
-      ["foo", "foo"],
-      ["WS", " \t"],
-      ["bar", "bar"],
-    ]
-    expect(run(<<EOF, "foo \tbar")).to eq expected
-token foo
-token bar
-token WS \\s+
-EOF
-  end
-
-  it "allows dropping a matched pattern" do
-    expected = [
-      ["foo", "foo"],
-      [nil, " \t"],
-      ["bar", "bar"],
-    ]
-    expect(run(<<EOF, "foo \tbar")).to eq expected
-token foo
-token bar
-drop \\s+
-EOF
-  end
-end
--- a/spec/propane/parser/item_spec.rb
+++ b/spec/propane/parser/item_spec.rb
@ -1,19 +0,0 @@
-class Propane
-  class Parser
-
-    describe Item do
-
-      it "operates properly with a set" do
-        rule = Object.new
-        item1 = Item.new(rule, 2)
-        item2 = Item.new(rule, 2)
-        expect(item1).to eq item2
-        expect(item1.eql?(item2)).to be_truthy
-        set = Set.new([item1, item2])
-        expect(set.size).to eq 1
-      end
-
-    end
-
-  end
-end
--- a/spec/propane/regex_spec.rb
+++ b/spec/propane/regex_spec.rb
@ -1,333 +0,0 @@
-class Propane
-  RSpec.describe Regex do
-
-    it "parses an empty expression" do
-      regex = Regex.new("")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0].size).to eq 0
-    end
-
-    it "parses a single character unit expression" do
-      regex = Regex.new("a")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a group with a single character unit expression" do
-      regex = Regex.new("(a)")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::AlternatesUnit
-      alt_unit = seq_unit[0]
-      expect(alt_unit.alternates.size).to eq 1
-      expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
-      expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a *" do
-      regex = Regex.new("a*")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 0
-      expect(m_unit.max_count).to be_nil
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a +" do
-      regex = Regex.new("a+")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 1
-      expect(m_unit.max_count).to be_nil
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a ?" do
-      regex = Regex.new("a?")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 0
-      expect(m_unit.max_count).to eq 1
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a multiplicity count" do
-      regex = Regex.new("a{5}")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 5
-      expect(m_unit.max_count).to eq 5
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a minimum-only multiplicity count" do
-      regex = Regex.new("a{5,}")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 5
-      expect(m_unit.max_count).to be_nil
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses a minimum and maximum multiplicity count" do
-      regex = Regex.new("a{5,8}")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
-      m_unit = seq_unit[0]
-      expect(m_unit.min_count).to eq 5
-      expect(m_unit.max_count).to eq 8
-      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
-      expect(m_unit.unit.first).to eq "a".ord
-    end
-
-    it "parses an escaped *" do
-      regex = Regex.new("a\\*")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 2
-      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[0].first).to eq "a".ord
-      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[1].first).to eq "*".ord
-    end
-
-    it "parses an escaped +" do
-      regex = Regex.new("a\\+")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 2
-      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[0].first).to eq "a".ord
-      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[1].first).to eq "+".ord
-    end
-
-    it "parses an escaped \\" do
-      regex = Regex.new("\\\\d")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 2
-      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[0].first).to eq "\\".ord
-      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
-      expect(seq_unit[1].first).to eq "d".ord
-    end
-
-    it "parses a character class" do
-      regex = Regex.new("[a-z_]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.negate).to be_falsey
-      expect(ccu.size).to eq 2
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "a".ord
-      expect(ccu[0].last).to eq "z".ord
-      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[1].first).to eq "_".ord
-    end
-
-    it "parses a negated character class" do
-      regex = Regex.new("[^xyz]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.negate).to be_truthy
-      expect(ccu.size).to eq 3
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "x".ord
-    end
-
-    it "parses - as a plain character at beginning of a character class" do
-      regex = Regex.new("[-9]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.size).to eq 2
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "-".ord
-    end
-
-    it "parses - as a plain character at end of a character class" do
-      regex = Regex.new("[0-]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.size).to eq 2
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "0".ord
-      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[1].first).to eq "-".ord
-    end
-
-    it "parses - as a plain character at beginning of a negated character class" do
-      regex = Regex.new("[^-9]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.negate).to be_truthy
-      expect(ccu.size).to eq 2
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "-".ord
-    end
-
-    it "parses . as a plain character in a character class" do
-      regex = Regex.new("[.]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.negate).to be_falsey
-      expect(ccu.size).to eq 1
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq ".".ord
-    end
-
-    it "parses - as a plain character when escaped in middle of character class" do
-      regex = Regex.new("[0\\-9]")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      seq_unit = regex.unit.alternates[0]
-      expect(seq_unit.size).to eq 1
-      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
-      ccu = seq_unit[0]
-      expect(ccu.negate).to be_falsey
-      expect(ccu.size).to eq 3
-      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[0].first).to eq "0".ord
-      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[1].first).to eq "-".ord
-      expect(ccu[2]).to be_a Regex::CharacterRangeUnit
-      expect(ccu[2].first).to eq "9".ord
-    end
-
-    it "parses alternates" do
-      regex = Regex.new("ab|c")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 2
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[0].size).to eq 2
-      expect(regex.unit.alternates[1].size).to eq 1
-    end
-
-    it "parses a ." do
-      regex = Regex.new("a.b")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 1
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
-      expect(regex.unit.alternates[0][1].units.size).to eq 2
-      expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
-    end
-
-    it "parses something complex" do
-      regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
-      expect(regex.unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates.size).to eq 4
-      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[0].size).to eq 1
-      expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
-      expect(regex.unit.alternates[0][0].min_count).to eq 0
-      expect(regex.unit.alternates[0][0].max_count).to be_nil
-      expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
-      expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
-      expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
-      expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
-      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[1].size).to eq 1
-      expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
-      expect(regex.unit.alternates[1][0].negate).to be_truthy
-      expect(regex.unit.alternates[1][0].size).to eq 1
-      expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[2].size).to eq 2
-      expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[2][0].first).to eq "|".ord
-      expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[2][1].first).to eq "v".ord
-      expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
-      expect(regex.unit.alternates[3].size).to eq 1
-      expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
-      expect(regex.unit.alternates[3][0].min_count).to eq 1
-      expect(regex.unit.alternates[3][0].max_count).to be_nil
-      expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
-      expect(regex.unit.alternates[3][0].unit.size).to eq 1
-      expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
-      expect(regex.unit.alternates[3][0].unit[0].first).to eq "x".ord
-      expect(regex.unit.alternates[3][0].unit[0].last).to eq "y".ord
-    end
-
-  end
-end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -1,97 +0,0 @@
-require "fileutils"
-
-describe Propane do
-  def write_grammar(grammar)
-    File.write("spec/run/testparser.i", grammar)
-  end
-
-  def build_parser
-    result = system(*%w[./propane.sh spec/run/testparser.i spec/run/testparser.d])
-    expect(result).to be_truthy
-  end
-
-  def compile(test_file)
-    result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d], test_file)
-    expect(result).to be_truthy
-  end
-
-  def run
-    result = system("spec/run/testparser")
-    expect(result).to be_truthy
-  end
-
-  before(:each) do
-    FileUtils.rm_rf("spec/run")
-    FileUtils.mkdir_p("spec/run")
-  end
-
-  it "generates a D lexer" do
-    write_grammar <<EOF
-token int \\d+
-token plus \\+
-token times \\*
-drop \\s+
-Start: [Foo] <<
->>
-Foo: [int] <<
->>
-Foo: [plus] <<
->>
-EOF
-    build_parser
-    compile("spec/test_d_lexer.d")
-    run
-  end
-
-  it "generates a parser" do
-    write_grammar <<EOF
-token plus \\+
-token times \\*
-token zero 0
-token one 1
-Start: [E] <<
->>
-E: [E times B] <<
->>
-E: [E plus B] <<
->>
-E: [B] <<
->>
-B: [zero] <<
->>
-B: [one] <<
->>
-EOF
-    build_parser
-  end
-
-  it "distinguishes between multiple identical rules with lookahead symbol" do
-    write_grammar <<EOF
-token a
-token b
-Start: [R1 a] <<
->>
-Start: [R2 b] <<
->>
-R1: [a b] <<
->>
-R2: [a b] <<
->>
-EOF
-    build_parser
-  end
-
-  it "handles reducing a rule that could be arrived at from multiple states" do
-    write_grammar <<EOF
-token a
-token b
-Start: [a R1] <<
->>
-Start: [b R1] <<
->>
-R1: [b] <<
->>
-EOF
-    build_parser
-  end
-end
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@ -1,11 +0,0 @@
-require "bundler/setup"
-require "propane"
-
-RSpec.configure do |config|
-  # Enable flags like --only-failures and --next-failure
-  config.example_status_persistence_file_path = ".rspec_status"
-
-  config.expect_with :rspec do |c|
-    c.syntax = :expect
-  end
-end
--- a/spec/test_d_lexer.d
+++ b/spec/test_d_lexer.d
@ -1,66 +0,0 @@
-import testparser;
-import std.stdio;
-
-int main()
-{
-    return 0;
-}
-
-unittest
-{
-    alias DCP = Testparser.Decoder.DecodedCodePoint;
-    string inputstring = "5+\n 66";
-    const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr;
-    size_t input_length = inputstring.length;
-    DCP dcp;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP('5', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP('+', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP('\n', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP(' ', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP('6', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP('6', 1u));
-    input += dcp.code_point_length;
-    input_length -= dcp.code_point_length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
-
-    inputstring = "\xf0\x9f\xa7\xa1";
-    input = cast(const(ubyte) *)inputstring.ptr;
-    input_length = inputstring.length;
-    dcp = Testparser.Decoder.decode_code_point(input, input_length);
-    assert(dcp == DCP(0x1F9E1, 4u));
-}
-
-unittest
-{
-    alias LT = Testparser.Lexer.LexedToken;
-    string input = "5 + 4 * \n677 + 567";
-    Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length);
-    assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_INT));
-    assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_PLUS));
-    assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_INT));
-    assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_TIMES));
-    assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
-    assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
-    assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
-    assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
-
-    lexer = new Testparser.Lexer(null, 0u);
-    assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
-}
--- a/tests/Makefile
+++ b/tests/Makefile
@ -0,0 +1,14 @@
+
+all:
+	for d in *; do \
+		if [ -d $$d ]; then \
+			make -C $$d; \
+		fi; \
+	done
+
+clean:
+	for d in *; do \
+		if [ -d $$d ]; then \
+			make -C $$d clean; \
+		fi; \
+	done
--- a/tests/build/Makefile
+++ b/tests/build/Makefile
@ -0,0 +1,15 @@
+
+TARGET   := test
+I_SOURCE := itest
+CXXFLAGS := -O2
+LDFLAGS  := -lpcre
+
+all: $(TARGET)
+	./$(TARGET)
+
+$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
+	imbecile $(I_SOURCE).I
+	$(CXX) -o $@ *.cc $(LDFLAGS)
+
+clean:
+	-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h
--- a/tests/build/itest.I
+++ b/tests/build/itest.I
@ -0,0 +1,37 @@
+
+[tokens]
+
+AND         and
+OR          or
+NOT         not
+LPAREN      \(
+RPAREN      \)
+WS          \s+
+EQUALS      = %{ cout << "Saw '='" << endl; %}
+IDENTIFIER  [a-zA-Z_][a-zA-Z_0-9]* %{
+    cout << "Identify: '" << matches[0] << "'" << endl;
+%}
+
+DEC_INT     [1-9]\d*\b
+${
+    uint64_t value;
+$}
+%{
+    sscanf(matches[0].c_str(), "%lld", &value);
+    cout << "value: " << value << endl;
+%}
+
+HEX_INT     0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
+    sscanf(matches[1].c_str(), "%llx", &value);
+    cout << "value: " << value << endl;
+%}
+
+OCT_INT     0([0-7]*)\b
+BIN_INT     0b([01]+)\b
+
+[rules]
+
+Assignment := IDENTIFIER ASSIGN Expression
+
+Expression := IDENTIFIER \
+            | Assignment
--- a/tests/build/main.cc
+++ b/tests/build/main.cc
@ -0,0 +1,17 @@
+
+#include <sstream>
+#include <string>
+
+#include "itest.h"
+
+using namespace std;
+
+int main(int argc, char * argv[])
+{
+    Parser p;
+    stringstream t(string(
+                "hi there (one and two and three and four) or (two = nine)\n"
+                "0x42 12345 0 011 0b0011\n"
+                ));
+    p.parse(t);
+}
--- a/tmpl/parser.cc
+++ b/tmpl/parser.cc
@ -0,0 +1,202 @@
+
+#include <string.h>                 /* memcpy() */
+#include <pcre.h>
+
+#include <iostream>
+#include <vector>
+
+#include {%header_name%}
+
+using namespace std;
+
+#ifdef I_NAMESPACE
+namespace I_NAMESPACE {
+#endif
+
+I_CLASSNAME::I_CLASSNAME()
+    : m_errstr(NULL)
+{
+}
+
+static TokenRef buildToken(int typeindex)
+{
+    TokenRef token;
+    switch (typeindex)
+    {
+        {%buildToken%}
+    }
+    if (!token.isNull())
+    {
+        token->setType(typeindex);
+    }
+    return token;
+}
+
+static void read_istream(istream & i, vector<char> & buff, int & size)
+{
+    size = 0;
+    int bytes_read;
+    char read_buff[1000];
+    while (!i.eof())
+    {
+        i.read(&read_buff[0], sizeof(read_buff));
+        bytes_read = i.gcount();
+        size += bytes_read;
+        for (int j = 0; j < bytes_read; j++)
+            buff.push_back(read_buff[j]);
+    }
+}
+
+bool I_CLASSNAME::parse(istream & i)
+{
+    struct {
+        const char * name;
+        const char * definition;
+        bool process;
+        pcre * re;
+        pcre_extra * re_extra;
+    } tokens[] = {
+        {%token_list%}
+    };
+
+    if (sizeof(tokens)/sizeof(tokens[0]) == 0)
+    {
+        m_errstr = "No tokens defined";
+        return false;
+    }
+
+    vector<char> buff;
+    int buff_size;
+    read_istream(i, buff, buff_size);
+
+    if (buff_size <= 0)
+    {
+        m_errstr = "0-length input string";
+        return false;
+    }
+
+    /* append trailing NUL byte for pcre functions */
+    buff.push_back('\0');
+
+    /* compile all token regular expressions */
+    for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
+    {
+        const char * errptr;
+        int erroffset;
+        tokens[i].re = pcre_compile(tokens[i].definition, 0,
+                &errptr, &erroffset, NULL);
+        if (tokens[i].re == NULL)
+        {
+            cerr << "Error compiling token '" << tokens[i].name
+                << "' regular expression at position " << erroffset
+                << ": " << errptr << endl;
+            m_errstr = "Error in token regular expression";
+            return false;
+        }
+        tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
+    }
+
+    int buff_pos = 0;
+    const int ovector_num_matches = 16;
+    const int ovector_size = 3 * (ovector_num_matches + 1);
+    int ovector[ovector_size];
+    while (buff_pos < buff_size)
+    {
+        int longest_match_length = 0;
+        int longest_match_index = -1;
+        int longest_match_ovector[ovector_size];
+        for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
+        {
+            int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
+                    &buff[0], buff_size, buff_pos,
+                    PCRE_ANCHORED | PCRE_NOTEMPTY,
+                    ovector, ovector_size);
+            if (rc > 0)
+            {
+                /* this pattern matched some of the input */
+                int len = ovector[1] - ovector[0];
+                if (len > longest_match_length)
+                {
+                    longest_match_length = len;
+                    longest_match_index = i;
+                    memcpy(longest_match_ovector, ovector, sizeof(ovector));
+                }
+            }
+        }
+        if (longest_match_index < 0)
+        {
+            /* no pattern matched the input at the current position */
+            cerr << "Parse error" << endl;
+            return false;
+        }
+        Matches matches(tokens[longest_match_index].re,
+                &buff[0], longest_match_ovector, ovector_size);
+        TokenRef token = buildToken(longest_match_index);
+        if (token.isNull())
+        {
+            cerr << "Internal Error: null token" << endl;
+            return false;
+        }
+        token->process(matches);
+        m_tokens.push_back(token);
+        buff_pos += longest_match_length;
+    }
+}
+
+refptr<Node> Node::operator[](int index)
+{
+    return (0 <= index && index < m_indexed_children.size())
+        ? m_indexed_children[index]
+        : NULL;
+}
+
+refptr<Node> Node::operator[](const std::string & index)
+{
+    return (m_named_children.find(index) != m_named_children.end())
+        ? m_named_children[index]
+        : NULL;
+}
+
+void Token::process(const Matches & matches)
+{
+    {%token_code%}
+}
+
+Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
+    : m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
+{
+}
+
+std::string Matches::operator[](int index) const
+{
+    if (0 <= index && index < (m_ovec_size / 3))
+    {
+        int idx = 2 * index;
+        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
+        {
+            return string(m_data, m_ovector[idx],
+                    m_ovector[idx + 1] - m_ovector[idx]);
+        }
+    }
+    return "";
+}
+
+std::string Matches::operator[](const std::string & index) const
+{
+    int idx = pcre_get_stringnumber(m_re, index.c_str());
+    if (idx > 0 && idx < (m_ovec_size / 3))
+    {
+        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
+        {
+            return string(m_data, m_ovector[idx],
+                    m_ovector[idx + 1] - m_ovector[idx]);
+        }
+    }
+    return "";
+}
+
+{%token_classes_code%}
+
+#ifdef I_NAMESPACE
+};
+#endif
--- a/tmpl/parser.h
+++ b/tmpl/parser.h
@ -0,0 +1,181 @@
+
+#ifndef IMBECILE_PARSER_HEADER
+#define IMBECILE_PARSER_HEADER
+
+#include <pcre.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <iostream>
+#include <map>
+#include <vector>
+#include <list>
+
+{%user_includes%}
+
+{%defines%}
+
+#ifdef I_NAMESPACE
+namespace I_NAMESPACE {
+#endif
+
+#ifndef REFPTR_H
+#define REFPTR_H REFPTR_H
+
+/* Author: Josh Holtrop
+ * Purpose: Provide a reference-counting pointer-like first order
+ *   C++ object that will free the object it is pointing to when
+ *   all references to it have been destroyed.
+ * This implementation does not solve the circular reference problem.
+ * I was not concerned with that when developing this class.
+ */
+#include <stdlib.h>             /* NULL */
+
+template <typename T>
+class refptr
+{
+    public:
+        refptr<T>();
+        refptr<T>(T * ptr);
+        refptr<T>(const refptr<T> & orig);
+        refptr<T> & operator=(const refptr<T> & orig);
+        refptr<T> & operator=(T * ptr);
+        ~refptr<T>();
+        T & operator*() const { return *m_ptr; }
+        T * operator->() const { return m_ptr; }
+        bool isNull() const { return m_ptr == NULL; }
+
+    private:
+        void cloneFrom(const refptr<T> & orig);
+        void destroy();
+
+        T * m_ptr;
+        int * m_refCount;
+};
+
+template <typename T> refptr<T>::refptr()
+{
+    m_ptr = NULL;
+    m_refCount = NULL;
+}
+
+template <typename T> refptr<T>::refptr(T * ptr)
+{
+    m_ptr = ptr;
+    m_refCount = new int;
+    *m_refCount = 1;
+}
+
+template <typename T> refptr<T>::refptr(const refptr<T> & orig)
+{
+    cloneFrom(orig);
+}
+
+template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
+{
+    destroy();
+    cloneFrom(orig);
+    return *this;
+}
+
+template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
+{
+    destroy();
+    m_ptr = ptr;
+    m_refCount = new int;
+    *m_refCount = 1;
+    return *this;
+}
+
+template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
+{
+    this->m_ptr = orig.m_ptr;
+    this->m_refCount = orig.m_refCount;
+    if (m_refCount != NULL)
+        (*m_refCount)++;
+}
+
+template <typename T> refptr<T>::~refptr()
+{
+    destroy();
+}
+
+template <typename T> void refptr<T>::destroy()
+{
+    if (m_refCount != NULL)
+    {
+        if (*m_refCount <= 1)
+        {
+            delete m_ptr;
+            delete m_refCount;
+        }
+        else
+        {
+            (*m_refCount)--;
+        }
+    }
+}
+
+#endif
+
+
+class Matches
+{
+    public:
+        Matches(pcre * re, const char * data, int * ovector, int ovec_size);
+        std::string operator[](int index) const;
+        std::string operator[](const std::string & index) const;
+
+    protected:
+        pcre * m_re;
+        const char * m_data;
+        int * m_ovector;
+        int m_ovec_size;
+};
+
+class Node
+{
+    public:
+        refptr<Node> operator[](int index);
+        refptr<Node> operator[](const std::string & index);
+
+    protected:
+        std::map< std::string, refptr<Node> > m_named_children;
+        std::vector< refptr<Node> > m_indexed_children;
+};
+typedef refptr<Node> NodeRef;
+
+class Token : public Node
+{
+    public:
+        virtual void process(const Matches & matches);
+        void setType(int type) { m_type = type; }
+        int getType() const { return m_type; }
+
+    protected:
+        int m_type;
+
+        {%token_data%}
+};
+typedef refptr<Token> TokenRef;
+
+{%token_classes%}
+
+class I_CLASSNAME
+{
+    public:
+        I_CLASSNAME();
+        bool parse(std::istream & in);
+        const char * getError() { return m_errstr; }
+
+    protected:
+        const char * m_errstr;
+        std::list<TokenRef> m_tokens;
+};
+
+#ifdef I_NAMESPACE
+};
+#endif
+
+#endif /* IMBECILE_PARSER_HEADER */
				`@ -0,0 +1 @@`
				`Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef`