Update README

Rename to propane
Update license years
2022-05-30 15:40:31 -04:00 · 2022-05-28 20:20:03 -04:00 · 2022-05-27 21:49:54 -04:00 · 2022-05-27 00:15:03 -04:00 · 2022-05-27 00:14:26 -04:00 · 2022-05-27 00:12:40 -04:00
51 changed files with 2313 additions and 1317 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,10 @@
-imbecile
-tags
-*.o
-.*.swp
-*.dep
-tmpl.*
-tests/*/itest.cc
-tests/*/itest.h
-tests/*/test
+/.bundle/
+/.yardoc
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+/.rspec_status
+/spec/run/
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "refptr"]
-	path = refptr
-	url = http://github.com/holtrop/refptr.git
--- a/.rspec
+++ b/.rspec
@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper
--- a/4
+++ b/4
@ -0,0 +1,4 @@
+source "https://rubygems.org"
+
+gem "rake"
+gem "rspec"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -0,0 +1,28 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    diff-lcs (1.5.0)
+    rake (13.0.6)
+    rspec (3.11.0)
+      rspec-core (~> 3.11.0)
+      rspec-expectations (~> 3.11.0)
+      rspec-mocks (~> 3.11.0)
+    rspec-core (3.11.0)
+      rspec-support (~> 3.11.0)
+    rspec-expectations (3.11.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.11.0)
+    rspec-mocks (3.11.1)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.11.0)
+    rspec-support (3.11.0)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  rake
+  rspec
+
+BUNDLED WITH
+   2.4.0.dev
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2010-2022 Josh Holtrop
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/61
+++ b/61
@ -1,61 +0,0 @@
-
-TARGET   := imbecile
-CXXOBJS  := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
-CXXDEPS  := $(patsubst %.o,.%.dep,$(CXXOBJS))
-CXXFLAGS := -O2
-DEPS     := $(CXXDEPS)
-OBJS     := $(CXXOBJS)
-LDFLAGS  := -lpcre
-CPPFLAGS := -I$(shell pwd)/refptr
-
-all: submodule_check tmpl.h $(TARGET)
-
-.PHONY: submodule_check
-submodule_check:
-	@if [ ! -e refptr/refptr.h ]; then \
-		echo Error: \"refptr\" folder is not populated.; \
-		echo Perhaps you forgot to do \"git checkout --recursive\"?; \
-		echo You can remedy the situation with \"git submodule update --init\".; \
-		exit 1; \
-	fi
-
-$(TARGET): $(OBJS)
-	$(CXX) -o $@ $^ $(LDFLAGS)
-
-# Object file rules
-%.o: %.cc
-	$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
-
-# Make dependency files
-.%.dep: %.c
-	@set -e; rm -f $@; \
-	  $(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
-
-.%.dep: %.cc tmpl.h
-	@set -e; rm -f $@; \
-	  $(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
-
-tmpl.cc: $(wildcard tmpl/*)
-	echo -n > $@
-	for f in $*/*; \
-		do xxd -i $$f >> $@; \
-	done
-
-tmpl.h: tmpl.cc
-	echo '#ifndef $*_h' > $@
-	echo '#define $*_h' >> $@
-	grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
-	echo '#endif' >> $@
-
-.PHONY: tests
-tests: PATH := $(shell pwd):$(PATH)
-tests: all
-	$(MAKE) -C $@
-
-tests-clean:
-	$(MAKE) -C tests clean
-
-clean: tests-clean
-	-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
-
-include $(CXXDEPS)
--- a/Parser.cc
+++ b/Parser.cc
@ -1,423 +0,0 @@
-
-#include <stdio.h>
-#include <string.h>
-#include <pcre.h>
-#include <ctype.h>                  /* toupper() */
-
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <map>
-
-#include "Parser.h"
-#include "TokenDefinition.h"
-#include "RuleDefinition.h"
-#include "tmpl.h"
-
-using namespace std;
-
-#define DEBUG
-
-Parser::Parser()
-    : m_classname("Parser"), m_namespace(""), m_extension("cc"),
-    m_token_data(new string()), m_token_code(new string()),
-    m_defines(new string())
-{
-}
-
-void Parser::makeDefine(const string & defname, const string & definition)
-{
-    *m_defines += string("#define ") + defname + " " + definition + "\n";
-}
-
-bool Parser::write(const string & fname)
-{
-    if (m_tokens.size() < 1 || m_rules.size() < 1)
-        return false;
-
-    string header_fname = fname + ".h";
-    string body_fname = fname + "." + m_extension;
-
-    ofstream header(header_fname.c_str());
-    ofstream body(body_fname.c_str());
-
-    /* process data */
-    refptr<string> token_classes = new string();
-    refptr<string> token_classes_code = new string();
-    int i = 0;
-    for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
-            it != m_tokens.end();
-            it++)
-    {
-        char buff[20];
-        sprintf(buff, "%d", i++);
-        makeDefine((*it)->getIdentifier(), buff);
-        *token_classes += (*it)->getClassDefinition();
-        *token_classes_code += (*it)->getProcessMethod();
-    }
-    if (m_namespace != "")
-    {
-        makeDefine("I_NAMESPACE", m_namespace);
-    }
-    makeDefine("I_CLASSNAME", m_classname);
-
-    /* set up replacements */
-    setReplacement("token_list", buildTokenList());
-    setReplacement("buildToken", buildBuildToken());
-    setReplacement("header_name",
-            new string(string("\"") + header_fname + "\""));
-    setReplacement("token_code", m_token_code);
-    setReplacement("token_data", m_token_data);
-    setReplacement("defines", m_defines);
-    setReplacement("token_classes", token_classes);
-    setReplacement("token_classes_code", token_classes_code);
-
-    /* write the header */
-    writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
-
-    /* write the body */
-    writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
-
-    header.close();
-    body.close();
-
-    return true;
-}
-
-bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
-{
-    char * newline;
-    char * data = dat;
-    const char * errptr;
-    int erroffset;
-    data[len-1] = '\n';
-    const int ovec_size = 6;
-    int ovector[ovec_size];
-    pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
-    while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
-    {
-        if (pcre_exec(replace, NULL, data, newline - data,
-                    0, 0, ovector, ovec_size) >= 0)
-        {
-            if (ovector[0] > 0)
-            {
-                out.write(data, ovector[0]);
-            }
-            out << *getReplacement(string(data, ovector[2],
-                        ovector[3] - ovector[2]));
-            if (ovector[1] < newline - data)
-            {
-                out.write(data + ovector[1], newline - data - ovector[1]);
-            }
-        }
-        else
-        {
-            out.write(data, newline - data);
-        }
-        out << '\n';
-        data = newline + 1;
-    }
-}
-
-refptr<std::string> Parser::getReplacement(const std::string & name)
-{
-    if (m_replacements.find(name) != m_replacements.end())
-    {
-        return m_replacements[name];
-    }
-#ifdef DEBUG
-    cerr << "No replacement found for \"" << name << "\"" << endl;
-#endif
-    return new string("");
-}
-
-refptr<string> Parser::buildTokenList()
-{
-    refptr<string> tokenlist = new string();
-    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
-            t != m_tokens.end();
-            t++)
-    {
-        if (t != m_tokens.begin())
-            *tokenlist += "    ";
-        *tokenlist += "{ \"" + (*t)->getName() + "\", \""
-            + (*t)->getCString() + "\", "
-            + ((*t)->getProcessFlag() ? "true" : "false") + " }";
-        if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
-            *tokenlist += ",\n";
-    }
-    return tokenlist;
-}
-
-refptr<string> Parser::buildBuildToken()
-{
-    refptr<string> buildToken = new string();
-    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
-            t != m_tokens.end();
-            t++)
-    {
-        *buildToken += "case " + (*t)->getIdentifier() + ":\n";
-        *buildToken += "    token = new " + (*t)->getClassName() + "();\n";
-        *buildToken += "    break;\n";
-    }
-    return buildToken;
-}
-
-bool Parser::parseInputFile(char * buff, int size)
-{
-    typedef pcre * pcre_ptr;
-    enum { none, tokens, rules };
-    pcre_ptr empty, comment, section_name, token, rule,
-             data_begin, data_end, code_begin, code_end;
-    struct { pcre_ptr * re; const char * pattern; } exprs[] = {
-        {&empty,        "^\\s*$"},
-        {&comment,      "^\\s*#"},
-        {&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
-        {&token,        "^\\s*"                     /* possible leading ws */
-                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* 1: token name */
-                        "\\s+"                      /* required whitespace */
-                        "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
-        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"},
-        {&data_begin,   "^\\s*\\${"},
-        {&data_end,     "\\$}"},
-        {&code_begin,   "^\\s*%{"},
-        {&code_end,     "%}"}
-    };
-    const int ovec_size = 3 * 10;
-    int ovector[ovec_size];
-    int lineno = 0;
-    char * newline;
-    char * input = buff;
-    string current_section_name;
-    map<string, int> sections;
-    sections["none"] = none;
-    sections["tokens"] = tokens;
-    sections["rules"] = rules;
-    int section = none;
-    string line;
-    bool append_line = false;
-    bool gathering_data = false;
-    bool gathering_code = false;
-    string gather;
-    bool continue_line = false;
-    TokenDefinitionRef current_token;
-
-    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
-    {
-        const char * errptr;
-        int erroffset;
-        *exprs[i].re = pcre_compile(exprs[i].pattern, 0,
-                &errptr, &erroffset, NULL);
-        if (*exprs[i].re == NULL)
-        {
-            cerr << "Error compiling regex '" << exprs[i].pattern <<
-                "': " << errptr << " at position " << erroffset << endl;
-            return false;
-        }
-    }
-
-    for (;;)
-    {
-        if (continue_line)
-        {
-            continue_line = false;
-        }
-        else
-        {
-            if ((newline = strstr(input, "\n")) == NULL)
-                break;
-            int line_length = newline - input;
-            if (line_length >= 1 && newline[-1] == '\r')
-            {
-                newline[-1] = '\n';
-                line_length--;
-            }
-            lineno++;
-
-            if (append_line)
-            {
-                line += string(input, line_length);
-            }
-            else
-            {
-                line = string(input, line_length);
-            }
-            input = newline + 1;        /* set up for next loop iteration */
-        }
-
-        if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
-                  0, 0, ovector, ovec_size) >= 0)
-          || (pcre_exec(comment, NULL, line.c_str(), line.size(),
-                  0, 0, ovector, ovec_size) >= 0)
-           )
-        {
-            /* skip empty or comment lines */;
-            continue;
-        }
-
-        if (! (gathering_code || gathering_data) )
-        {
-            if (line.size() > 0 && line[line.size()-1] == '\\')
-            {
-                line[line.size()-1] = ' ';
-                append_line = true;
-                continue;
-            }
-            else
-            {
-                append_line = false;
-            }
-
-            if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
-                        0, 0, ovector, ovec_size) >= 0)
-            {
-                current_section_name
-                    = string(line, ovector[2], ovector[3] - ovector[2]);
-                if (sections.find(current_section_name) != sections.end())
-                {
-                    section = sections[current_section_name];
-                }
-                else
-                {
-                    cerr << "Unknown section name '" << current_section_name
-                        << "'!" << endl;
-                    return false;
-                }
-                continue;
-            }
-        }
-
-        switch (section)
-        {
-            case none:
-                cerr << "Unrecognized input on line " << lineno << endl;
-                return false;
-            case tokens:
-                if      (gathering_data)
-                {
-                    if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
-                                0, 0, ovector, ovec_size) >= 0)
-                    {
-                        gather += string(line, 0, ovector[0]) + "\n";
-                        gathering_data = false;
-                        line = string(line, ovector[1]);
-                        continue_line = true;
-                        if (current_token.isNull())
-                        {
-                            *m_token_data += gather;
-                        }
-                        else
-                        {
-                            current_token->addData(gather);
-                        }
-                    }
-                    else
-                    {
-                        gather += line + "\n";
-                    }
-                    continue;
-                }
-                else if (gathering_code)
-                {
-                    if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
-                                0, 0, ovector, ovec_size) >= 0)
-                    {
-                        gather += string(line, 0, ovector[0]) + "\n";
-                        gathering_code = false;
-                        line = string(line, ovector[1]);
-                        continue_line = true;
-                        if (current_token.isNull())
-                        {
-                            *m_token_code += gather;
-                        }
-                        else
-                        {
-                            current_token->addCode(gather);
-                        }
-                    }
-                    else
-                    {
-                        gather += line + "\n";
-                    }
-                    continue;
-                }
-                else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
-                            0, 0, ovector, ovec_size) >= 0)
-                {
-                    gathering_data = true;
-                    gather = "";
-                    line = string(line, ovector[1]);
-                    continue_line = true;
-                    continue;
-                }
-                else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
-                            0, 0, ovector, ovec_size) >= 0)
-                {
-                    gathering_code = true;
-                    gather = "";
-                    line = string(line, ovector[1]);
-                    continue_line = true;
-                    continue;
-                }
-                else if (pcre_exec(token, NULL, line.c_str(), line.size(),
-                            0, 0, ovector, ovec_size) >= 0)
-                {
-                    string name(line, ovector[2], ovector[3] - ovector[2]);
-                    string definition(line,
-                            ovector[4], ovector[5] - ovector[4]);
-                    current_token = new TokenDefinition();
-                    if (current_token->create(name, definition))
-                    {
-                        addTokenDefinition(current_token);
-                    }
-                    else
-                    {
-                        cerr << "Error in token definition ending on line "
-                            << lineno << endl;
-                        return false;
-                    }
-                    line = string(line, ovector[1]);
-                    continue_line = true;
-                    continue;
-                }
-                else
-                {
-                    cerr << "Unrecognized input on line " << lineno << endl;
-                    return false;
-                }
-                break;
-            case rules:
-                if (pcre_exec(rule, NULL, line.c_str(), line.size(),
-                            0, 0, ovector, ovec_size) >= 0)
-                {
-                    string name(line, ovector[2], ovector[3] - ovector[2]);
-                    string definition(line,
-                            ovector[4], ovector[5] - ovector[4]);
-                    refptr<RuleDefinition> rd = new RuleDefinition();
-                    if (rd->create(name, definition))
-                    {
-                        addRuleDefinition(rd);
-                    }
-                    else
-                    {
-                        cerr << "Error in rule definition ending on line "
-                            << lineno << endl;
-                        return false;
-                    }
-                }
-                else
-                {
-                    cerr << "Unrecognized input on line " << lineno << endl;
-                    return false;
-                }
-                break;
-        }
-    }
-
-    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
-    {
-        pcre_free(*exprs[i].re);
-    }
-
-    return true;
-}
--- a/Parser.h
+++ b/Parser.h
@ -1,61 +0,0 @@
-
-#ifndef PARSER_H
-#define PARSER_H
-
-#include <vector>
-#include <string>
-#include <list>
-#include <map>
-
-#include "refptr.h"
-#include "TokenDefinition.h"
-#include "RuleDefinition.h"
-
-class Parser
-{
-    public:
-        Parser();
-        void addTokenDefinition(refptr<TokenDefinition> td)
-        {
-            m_tokens.push_back(td);
-        }
-        void addRuleDefinition(refptr<RuleDefinition> rd)
-        {
-            m_rules.push_back(rd);
-        }
-        bool write(const std::string & fname);
-        bool parseInputFile(char * buff, int size);
-
-        void setClassName(const std::string & cn) { m_classname = cn; }
-        std::string getClassName() { return m_classname; }
-
-        void setNamespace(const std::string & ns) { m_namespace = ns; }
-        std::string getNamespace() { return m_namespace; }
-
-        void setExtension(const std::string & e) { m_extension = e; }
-        std::string getExtension() { return m_extension; }
-
-    protected:
-        refptr<std::string> buildTokenList();
-        refptr<std::string> buildBuildToken();
-        bool writeTmpl(std::ostream & out, char * dat, int len);
-        refptr<std::string> getReplacement(const std::string & name);
-        void setReplacement(const std::string & name, refptr<std::string> val)
-        {
-            m_replacements[name] = val;
-        }
-        void makeDefine(const std::string & defname,
-                const std::string & definition);
-
-        std::list<TokenDefinitionRef> m_tokens;
-        std::vector< refptr< RuleDefinition > > m_rules;
-        std::string m_classname;
-        std::string m_namespace;
-        std::string m_extension;
-        std::map< std::string, refptr<std::string> > m_replacements;
-        refptr<std::string> m_token_data;
-        refptr<std::string> m_token_code;
-        refptr<std::string> m_defines;
-};
-
-#endif
--- a/5
+++ b/5
@ -1,5 +0,0 @@
-Imbecile is a bottom-up parser generator. It targets C++ and automatically
-generates a class heirarchy for interacting with the parser.
-
-Imbecile generates both a lexer and a parser based on the rules given to
-it in the input file.
--- a/README.md
+++ b/README.md
@ -0,0 +1,31 @@
+# The Propane Parser Generator
+
+Propane is an LR Parser Generator (LPG) which:
+
+  * accepts LR(0), SLR, and LALR grammars
+  * generates a built-in lexer to tokenize input
+  * supports UTF-8 lexer inputs
+  * generates a table-driven parser to parse input in linear time
+  * is MIT-licensed
+  * is distributable as a standalone Ruby script
+
+## Installation
+
+TODO
+
+## Usage
+
+TODO: Write usage instructions here
+
+## Development
+
+After checking out the repository, run `bundle install` to install dependencies.
+Run `rake spec` to execute tests.
+
+## Contributing
+
+Bug reports and pull requests are welcome on GitHub at https://github.com/holtrop/propane.
+
+## License
+
+Propane is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
--- a/9
+++ b/9
@ -0,0 +1,9 @@
+require "rspec/core/rake_task"
+
+RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
+  if args.example_pattern
+    task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
+  end
+end
+
+task :default => :spec
--- a/RuleDefinition.cc
+++ b/RuleDefinition.cc
@ -1,9 +0,0 @@
-
-#include "RuleDefinition.h"
-
-using namespace std;
-
-bool RuleDefinition::create(const string & name, const string & definition)
-{
-    m_name = name;
-}
--- a/RuleDefinition.h
+++ b/RuleDefinition.h
@ -1,16 +0,0 @@
-
-#ifndef RULEDEFINITION_H
-#define RULEDEFINITION_H
-
-#include <string>
-
-class RuleDefinition
-{
-    public:
-        bool create(const std::string & name, const std::string & definition);
-
-    protected:
-        std::string m_name;
-};
-
-#endif
--- a/TokenDefinition.cc
+++ b/TokenDefinition.cc
@ -1,125 +0,0 @@
-
-#include <pcre.h>
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include "TokenDefinition.h"
-#include "refptr.h"
-
-using namespace std;
-
-#define WHITESPACE " \n\r\t\v"
-
-static string trim(string s)
-{
-    size_t lastpos = s.find_last_not_of(WHITESPACE);
-    if (lastpos == string::npos)
-        return "";
-    s.erase(lastpos + 1);
-    s.erase(0, s.find_first_not_of(WHITESPACE));
-    return s;
-}
-
-static refptr< vector<string> > split(const string & delim, string str)
-{
-    refptr< vector<string> > ret = new vector<string>();
-    size_t pos;
-    while ( (pos = str.find(delim)) != string::npos )
-    {
-        string t = str.substr(0, pos);
-        ret->push_back(t);
-        str.erase(0, pos + 1);
-    }
-    if (str != "")
-        ret->push_back(str);
-    return ret;
-}
-
-static string c_escape(const string & orig)
-{
-    string result;
-    for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
-    {
-        if (*it == '\\' || *it == '"')
-            result += '\\';
-        result += *it;
-    }
-    return result;
-}
-
-
-TokenDefinition::TokenDefinition()
-    : m_process(false)
-{
-}
-
-bool TokenDefinition::create(const string & name,
-        const string & definition)
-{
-    const char * errptr;
-    int erroffset;
-    pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
-    if (re == NULL)
-    {
-        cerr << "Error compiling regular expression '" << definition
-            << "' at position " << erroffset << ": " << errptr << endl;
-        return false;
-    }
-    m_name = name;
-    m_definition = definition;
-    pcre_free(re);
-
-#if 0
-    refptr< vector< string > > parts = split(",", flags);
-    for (int i = 0, sz = parts->size(); i < sz; i++)
-    {
-        (*parts)[i] = trim((*parts)[i]);
-        string & s = (*parts)[i];
-        if (s == "p")
-        {
-            m_process = true;
-        }
-        else
-        {
-            cerr << "Unknown token flag \"" << s << "\"" << endl;
-            return false;
-        }
-    }
-#endif
-
-    return true;
-}
-
-string TokenDefinition::getCString() const
-{
-    return c_escape(m_definition);
-}
-
-string TokenDefinition::getClassDefinition() const
-{
-    string ret = "class "+ getClassName() + " : public Token {\n";
-    ret += "public:\n";
-    if (m_process)
-    {
-        ret += "    virtual void process(const Matches & matches);\n";
-    }
-    ret += "\n";
-    ret += "protected:\n";
-    ret += m_data + "\n";
-    ret += "};\n";
-    return ret;
-}
-
-string TokenDefinition::getProcessMethod() const
-{
-    string ret;
-    if (m_code != "")
-    {
-        ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
-        ret += m_code + "\n";
-        ret += "}\n";
-    }
-    return ret;
-}
--- a/TokenDefinition.h
+++ b/TokenDefinition.h
@ -1,37 +0,0 @@
-
-#ifndef TOKENDEFINITION_H
-#define TOKENDEFINITION_H
-
-#include <string>
-#include "refptr.h"
-
-class TokenDefinition
-{
-    public:
-        TokenDefinition();
-        bool create(const std::string & name,
-                const std::string & definition);
-        std::string getCString() const;
-        std::string getName() const { return m_name; }
-        bool getProcessFlag() const { return m_process; }
-        void setProcessFlag(bool p) { m_process = p; }
-        void addData(const std::string & d) { m_data += d; }
-        std::string getData() const { return m_data; }
-        void addCode(const std::string & c) { m_code += c; m_process = true; }
-        std::string getCode() const { return m_code; }
-        std::string getClassDefinition() const;
-        std::string getProcessMethod() const;
-        std::string getIdentifier() const { return "TK_" + m_name; }
-        std::string getClassName() const { return "Tk" + m_name; }
-
-    protected:
-        std::string m_name;
-        std::string m_definition;
-        bool m_process;
-        std::string m_data;
-        std::string m_code;
-};
-
-typedef refptr<TokenDefinition> TokenDefinitionRef;
-
-#endif
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -0,0 +1,252 @@
+<% if @modulename %>
+module <%= @modulename %>;
+
+<% end %>
+class <%= classname %>
+{
+    enum
+    {
+<% @tokens.each_with_index do |(name, token), index| %>
+<%   if token.name %>
+        TOKEN_<%= token.c_name %> = <%= index %>,
+<%   end %>
+<% end %>
+        TOKEN_EOF = <%= TOKEN_EOF %>,
+        TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
+        TOKEN_DROP = <%= TOKEN_DROP %>,
+        TOKEN_NONE = <%= TOKEN_NONE %>,
+    }
+
+    static immutable string TokenNames[] = [
+<% @tokens.each_with_index do |(name, token), index| %>
+<%   if token.name %>
+        "<%= token.name %>",
+<%   else %>
+        null,
+<%   end %>
+<% end %>
+    ];
+
+    static class Decoder
+    {
+        enum
+        {
+            CODE_POINT_INVALID = 0xFFFFFFFE,
+            CODE_POINT_EOF = 0xFFFFFFFF,
+        }
+
+        struct DecodedCodePoint
+        {
+            uint code_point;
+            uint code_point_length;
+        }
+
+        static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length)
+        {
+            if (input_length == 0u)
+            {
+                return DecodedCodePoint(CODE_POINT_EOF, 0u);
+            }
+            ubyte c = *input;
+            uint code_point;
+            uint code_point_length;
+            if ((c & 0x80u) == 0u)
+            {
+                code_point = c;
+                code_point_length = 1u;
+            }
+            else
+            {
+                ubyte following_bytes;
+                if ((c & 0xE0u) == 0xC0u)
+                {
+                    code_point = c & 0x1Fu;
+                    following_bytes = 1u;
+                }
+                else if ((c & 0xF0u) == 0xE0u)
+                {
+                    code_point = c & 0x0Fu;
+                    following_bytes = 2u;
+                }
+                else if ((c & 0xF8u) == 0xF0u)
+                {
+                    code_point = c & 0x07u;
+                    following_bytes = 3u;
+                }
+                else if ((c & 0xFCu) == 0xF8u)
+                {
+                    code_point = c & 0x03u;
+                    following_bytes = 4u;
+                }
+                else if ((c & 0xFEu) == 0xFCu)
+                {
+                    code_point = c & 0x01u;
+                    following_bytes = 5u;
+                }
+                if (input_length <= following_bytes)
+                {
+                    return DecodedCodePoint(CODE_POINT_INVALID, 0u);
+                }
+                code_point_length = following_bytes + 1u;
+                while (following_bytes-- > 0u)
+                {
+                    input++;
+                    code_point <<= 6u;
+                    code_point |= *input & 0x3Fu;
+                }
+            }
+            return DecodedCodePoint(code_point, code_point_length);
+        }
+    }
+
+    static class Lexer
+    {
+        private struct Transition
+        {
+            uint first;
+            uint last;
+            uint destination;
+        }
+
+        private struct State
+        {
+            uint transition_table_index;
+            uint n_transitions;
+            uint accepts;
+        }
+
+<% transition_table, state_table = lexer.dfa.build_tables %>
+        private static const Transition transitions[] = [
+<% transition_table.each do |transition_table_entry| %>
+            Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
+<% end %>
+        ];
+
+        private static const State states[] = [
+<% state_table.each do |state_table_entry| %>
+            State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
+<% end %>
+        ];
+
+        struct LexedToken
+        {
+            size_t row;
+            size_t col;
+            size_t length;
+            uint token;
+        }
+
+        private const(ubyte) * m_input;
+        private size_t m_input_length;
+        private size_t m_input_position;
+        private size_t m_input_row;
+        private size_t m_input_col;
+
+        this(const(ubyte) * input, size_t input_length)
+        {
+            m_input = input;
+            m_input_length = input_length;
+        }
+
+        LexedToken lex_token()
+        {
+            for (;;)
+            {
+                LexedToken lt = attempt_lex_token();
+                if (lt.token != TOKEN_DROP)
+                {
+                    return lt;
+                }
+            }
+        }
+
+        private LexedToken attempt_lex_token()
+        {
+            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
+            struct LexedTokenState
+            {
+                size_t length;
+                size_t delta_row;
+                size_t delta_col;
+                uint token;
+            }
+            LexedTokenState last_accepts_info;
+            last_accepts_info.token = TOKEN_NONE;
+            LexedTokenState attempt_info;
+            uint current_state;
+            for (;;)
+            {
+                auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
+                if (decoded.code_point == Decoder.CODE_POINT_INVALID)
+                {
+                    lt.token = TOKEN_DECODE_ERROR;
+                    return lt;
+                }
+                bool lex_continue = false;
+                if (decoded.code_point != Decoder.CODE_POINT_EOF)
+                {
+                    uint dest = transition(current_state, decoded.code_point);
+                    if (dest != cast(uint)-1)
+                    {
+                        lex_continue = true;
+                        attempt_info.length += decoded.code_point_length;
+                        if (decoded.code_point == '\n')
+                        {
+                            attempt_info.delta_row++;
+                            attempt_info.delta_col = 0u;
+                        }
+                        else
+                        {
+                            attempt_info.delta_col++;
+                        }
+                        current_state = dest;
+                        if (states[current_state].accepts != TOKEN_NONE)
+                        {
+                            attempt_info.token = states[current_state].accepts;
+                            last_accepts_info = attempt_info;
+                        }
+                    }
+                }
+                else if (attempt_info.length == 0u)
+                {
+                    lt.token = TOKEN_EOF;
+                    break;
+                }
+                if (!lex_continue)
+                {
+                    if (last_accepts_info.token != TOKEN_NONE)
+                    {
+                        lt.token = last_accepts_info.token;
+                        lt.length = last_accepts_info.length;
+                        m_input_position += last_accepts_info.length;
+                        m_input_row += last_accepts_info.delta_row;
+                        if (last_accepts_info.delta_row != 0u)
+                        {
+                            m_input_col = last_accepts_info.delta_col;
+                        }
+                        else
+                        {
+                            m_input_col += last_accepts_info.delta_col;
+                        }
+                    }
+                    break;
+                }
+            }
+            return lt;
+        }
+
+        private uint transition(uint current_state, uint code_point)
+        {
+            uint transition_table_index = states[current_state].transition_table_index;
+            for (uint i = 0u; i < states[current_state].n_transitions; i++)
+            {
+                if ((transitions[transition_table_index + i].first <= code_point) &&
+                    (code_point <= transitions[transition_table_index + i].last))
+                {
+                    return transitions[transition_table_index + i].destination;
+                }
+            }
+            return cast(uint)-1;
+        }
+    }
+}
--- a/bin/propane
+++ b/bin/propane
@ -0,0 +1,5 @@
+#!/usr/bin/env ruby
+
+require "propane"
+
+exit Propane::CLI.run(ARGV.dup)
--- a/imbecile.cc
+++ b/imbecile.cc
@ -1,101 +0,0 @@
-
-#include <getopt.h>
-
-#include <iostream>
-#include <fstream>
-
-#include "refptr.h"
-#include "Parser.h"
-
-using namespace std;
-
-string buildOutputFilename(string & input_fname);
-
-int main(int argc, char * argv[])
-{
-    int longind = 1;
-    int opt;
-    Parser p;
-    string outfile;
-
-    static struct option longopts[] = {
-        /* name, has_arg, flag, val */
-        { "classname", required_argument, NULL, 'c' },
-        { "extension", required_argument, NULL, 'e' },
-        { "namespace", required_argument, NULL, 'n' },
-        { "outfile", required_argument, NULL, 'o' },
-        { NULL, 0, NULL, 0 }
-    };
-
-    while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
-    {
-        switch (opt)
-        {
-            case 'c':   /* classname */
-                p.setClassName(optarg);
-                break;
-            case 'e':   /* extension */
-                p.setExtension(optarg);
-                break;
-            case 'n':   /* namespace */
-                p.setNamespace(optarg);
-                break;
-            case 'o':   /* outfile */
-                outfile = optarg;
-                break;
-        }
-    }
-
-    if (optind >= argc)
-    {
-        cerr << "Usage: imbecile [options] <input-file>" << endl;
-        return 1;
-    }
-
-    string input_fname = argv[optind];
-    ifstream ifs;
-    ifs.open(input_fname.c_str(), ios::binary);
-    if (!ifs.is_open())
-    {
-        cerr << "Error opening input file: '" << input_fname << "'";
-        return 2;
-    }
-    ifs.seekg(0, ios_base::end);
-    int size = ifs.tellg();
-    ifs.seekg(0, ios_base::beg);
-    char * buff = new char[size];
-    ifs.read(buff, size);
-    ifs.close();
-
-    if (outfile == "")
-        outfile = buildOutputFilename(input_fname);
-
-    if (!p.parseInputFile(buff, size))
-    {
-        cerr << "Error parsing " << input_fname << endl;
-        return 3;
-    }
-    if (!p.write(outfile))
-    {
-        cerr << "Error processing " << input_fname << endl;
-        return 4;
-    }
-
-    delete[] buff;
-    return 0;
-}
-
-string buildOutputFilename(string & input_fname)
-{
-    string outfile;
-    size_t len = input_fname.length();
-    if (len > 2 && input_fname.substr(len - 2) == ".I")
-    {
-        outfile = input_fname.substr(0, len - 2);
-    }
-    else
-    {
-        outfile = input_fname;
-    }
-    return outfile;
-}
--- a/lib/propane.rb
+++ b/lib/propane.rb
@ -0,0 +1,137 @@
+require "erb"
+require "set"
+require_relative "propane/cli"
+require_relative "propane/code_point_range"
+require_relative "propane/fa"
+require_relative "propane/fa/state"
+require_relative "propane/fa/state/transition"
+require_relative "propane/lexer"
+require_relative "propane/lexer/dfa"
+require_relative "propane/parser"
+require_relative "propane/parser/item"
+require_relative "propane/parser/item_set"
+require_relative "propane/regex"
+require_relative "propane/regex/nfa"
+require_relative "propane/regex/unit"
+require_relative "propane/rule"
+require_relative "propane/token"
+require_relative "propane/version"
+
+class Propane
+
+  # EOF.
+  TOKEN_EOF = 0xFFFFFFFC
+
+  # Decoding error.
+  TOKEN_DECODE_ERROR = 0xFFFFFFFD
+
+  # Token ID for a "dropped" token.
+  TOKEN_DROP = 0xFFFFFFFE
+
+  # Invalid token ID.
+  TOKEN_NONE = 0xFFFFFFFF
+
+  class Error < RuntimeError
+  end
+
+  def initialize(input)
+    @tokens = {}
+    @rules = {}
+    input = input.gsub("\r\n", "\n")
+    while !input.empty?
+      parse_grammar(input)
+    end
+  end
+
+  def generate(output_file, log_file)
+    expand_rules
+    lexer = Lexer.new(@tokens)
+    parser = Parser.new(@tokens, @rules)
+    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
+    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
+    result = erb.result(binding.clone)
+    File.open(output_file, "wb") do |fh|
+      fh.write(result)
+    end
+  end
+
+  private
+
+  def parse_grammar(input)
+    if input.slice!(/\A\s+/)
+      # Skip white space.
+    elsif input.slice!(/\A#.*\n/)
+      # Skip comment lines.
+    elsif input.slice!(/\Amodule\s+(\S+)\n/)
+      @modulename = $1
+    elsif input.slice!(/\Aclass\s+(\S+)\n/)
+      @classname = $1
+    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
+      name, pattern = $1, $2
+      if pattern.nil?
+        pattern = name
+      end
+      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
+        raise Error.new("Invalid token name #{name}")
+      end
+      if @tokens[name]
+        raise Error.new("Duplicate token name #{name}")
+      else
+        @tokens[name] = Token.new(name, pattern, @tokens.size)
+      end
+    elsif input.slice!(/\Adrop\s+(\S+)\n/)
+      pattern = $1
+      @tokens[name] = Token.new(nil, pattern, @tokens.size)
+    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
+      rule_name, components, code = $1, $2, $3
+      components = components.strip.split(/\s+/)
+      @rules[rule_name] ||= Rule.new(rule_name, @rules.size)
+      @rules[rule_name].add_pattern(components, code)
+    else
+      if input.size > 25
+        input = input.slice(0..20) + "..."
+      end
+      raise Error.new("Unexpected grammar input: #{input}")
+    end
+  end
+
+  def expand_rules
+    @rules.each do |rule_name, rule|
+      if @tokens.include?(rule_name)
+        raise Error.new("Rule name collides with token name #{rule_name}")
+      end
+    end
+    unless @rules["Start"]
+      raise Error.new("Start rule not found")
+    end
+    @rules.each do |rule_name, rule|
+      rule.patterns.each do |rule|
+        rule.components.map! do |component|
+          if @tokens[component]
+            @tokens[component]
+          elsif @rules[component]
+            @rules[component]
+          else
+            raise Error.new("Symbol #{component} not found")
+          end
+        end
+      end
+    end
+  end
+
+  class << self
+
+    def run(input_file, output_file, log_file)
+      begin
+        propane = Propane.new(File.read(input_file))
+        propane.generate(output_file, log_file)
+      rescue Error => e
+        $stderr.puts e.message
+        return 2
+      end
+      return 0
+    end
+
+  end
+
+end
--- a/lib/propane/cli.rb
+++ b/lib/propane/cli.rb
@ -0,0 +1,54 @@
+class Propane
+  module CLI
+
+    USAGE = <<EOF
+Usage: #{$0} [options] <input-file> <output-file>
+Options:
+  --log LOG   Write log file
+  --version   Show program version and exit
+  -h, --help  Show this usage and exit
+EOF
+
+    class << self
+
+      def run(args)
+        params = []
+        log_file = nil
+        i = 0
+        while i < args.size
+          arg = args[i]
+          case arg
+          when "--log"
+            if i + 1 < args.size
+              i += 1
+              log_file = args[i]
+            end
+          when "--version"
+            puts "propane v#{VERSION}"
+            return 0
+          when "-h", "--help"
+            puts USAGE
+            return 0
+          when /^-/
+            $stderr.puts "Error: unknown option #{arg}"
+            return 1
+          else
+            params << arg
+          end
+          i += 1
+        end
+        if params.size != 2
+          $stderr.puts "Error: specify input and output files"
+          return 1
+        end
+        unless File.readable?(params[0])
+          $stderr.puts "Error: cannot read #{params[0]}"
+          return 2
+        end
+        Propane.run(*params, log_file)
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/code_point_range.rb
+++ b/lib/propane/code_point_range.rb
@ -0,0 +1,84 @@
+class Propane
+  class CodePointRange
+
+    MAX_CODE_POINT = 0xFFFFFFFF
+
+    attr_reader :first
+    attr_reader :last
+
+    include Comparable
+
+    # Build a CodePointRange
+    def initialize(first, last = nil)
+      @first = first.ord
+      if last
+        @last = last.ord
+        if @last < @first
+          raise "Invalid CodePointRange: last code point must be > first code point"
+        end
+      else
+        @last = @first
+      end
+    end
+
+    def <=>(other)
+      if self.first != other.first
+        @first <=> other.first
+      else
+        @last <=> other.last
+      end
+    end
+
+    def include?(v)
+      if v.is_a?(CodePointRange)
+        @first <= v.first && v.last <= @last
+      else
+        @first <= v && v <= @last
+      end
+    end
+
+    def size
+      @last - @first + 1
+    end
+
+    class << self
+
+      def invert_ranges(code_point_ranges)
+        new_ranges = []
+        last_cp = -1
+        code_point_ranges.sort.each do |code_point_range|
+          if code_point_range.first > (last_cp + 1)
+            new_ranges << CodePointRange.new(last_cp + 1, code_point_range.first - 1)
+            last_cp = code_point_range.last
+          else
+            last_cp = [last_cp, code_point_range.last].max
+          end
+        end
+        if last_cp < MAX_CODE_POINT
+          new_ranges << CodePointRange.new(last_cp + 1, MAX_CODE_POINT)
+        end
+        new_ranges
+      end
+
+      def first_subrange(code_point_ranges)
+        code_point_ranges.sort.reduce do |result, code_point_range|
+          if code_point_range.include?(result.first)
+            if code_point_range.last < result.last
+              code_point_range
+            else
+              result
+            end
+          else
+            if code_point_range.first <= result.last
+              CodePointRange.new(result.first, code_point_range.first - 1)
+            else
+              result
+            end
+          end
+        end
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/fa.rb
+++ b/lib/propane/fa.rb
@ -0,0 +1,61 @@
+class Propane
+
+  class FA
+
+    attr_reader :start_state
+
+    def initialize
+      @start_state = State.new
+    end
+
+    def to_s
+      chr = lambda do |value|
+        if value < 32 || value > 127
+          "{#{value}}"
+        else
+          value.chr
+        end
+      end
+      rv = ""
+      states = enumerate
+      states.each do |state, id|
+        accepts_s = state.accepts ? " #{state.accepts}" : ""
+        rv += "#{id}#{accepts_s}:\n"
+        state.transitions.each do |transition|
+          if transition.nil?
+            range_s = "nil"
+          else
+            range_s = chr[transition.code_point_range.first]
+            if transition.code_point_range.size > 1
+              range_s += "-" + chr[transition.code_point_range.last]
+            end
+          end
+          accepts_s = transition.destination.accepts ? " #{transition.destination.accepts}" : ""
+          rv += "  #{range_s} => #{states[transition.destination]}#{accepts_s}\n"
+        end
+      end
+      rv
+    end
+
+    def enumerate
+      @_enumerated ||=
+        begin
+          id = 0
+          states = {}
+          visit = lambda do |state|
+            unless states.include?(state)
+              states[state] = id
+              id += 1
+              state.transitions.each do |transition|
+                visit[transition.destination]
+              end
+            end
+          end
+          visit[@start_state]
+          states
+        end
+    end
+
+  end
+
+end
--- a/lib/propane/fa/state.rb
+++ b/lib/propane/fa/state.rb
@ -0,0 +1,51 @@
+class Propane
+  class FA
+
+    class State
+
+      attr_accessor :accepts
+      attr_reader :transitions
+
+      def initialize
+        @transitions = []
+      end
+
+      def add_transition(code_point_range, destination)
+        @transitions << Transition.new(code_point_range, destination)
+      end
+
+      # Determine the set of states that can be reached by nil transitions.
+      # from this state.
+      #
+      # @return [Set<NFA::State>]
+      #   Set of states.
+      def nil_transition_states
+        states = Set[self]
+        analyze_state = lambda do |state|
+          state.nil_transitions.each do |transition|
+            unless states.include?(transition.destination)
+              states << transition.destination
+              analyze_state[transition.destination]
+            end
+          end
+        end
+        analyze_state[self]
+        states
+      end
+
+      def nil_transitions
+        @transitions.select do |transition|
+          transition.nil?
+        end
+      end
+
+      def cp_transitions
+        @transitions.reject do |transition|
+          transition.nil?
+        end
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/fa/state/transition.rb
+++ b/lib/propane/fa/state/transition.rb
@ -0,0 +1,23 @@
+class Propane
+  class FA
+    class State
+
+      class Transition
+
+        attr_reader :code_point_range
+        attr_reader :destination
+
+        def initialize(code_point_range, destination)
+          @code_point_range = code_point_range
+          @destination = destination
+        end
+
+        def nil?
+          @code_point_range.nil?
+        end
+
+      end
+
+    end
+  end
+end
--- a/lib/propane/lexer.rb
+++ b/lib/propane/lexer.rb
@ -0,0 +1,13 @@
+class Propane
+  class Lexer
+
+    # @return [DFA]
+    #   Lexer DFA.
+    attr_accessor :dfa
+
+    def initialize(tokens)
+      @dfa = DFA.new(tokens)
+    end
+
+  end
+end
--- a/lib/propane/lexer/dfa.rb
+++ b/lib/propane/lexer/dfa.rb
@ -0,0 +1,118 @@
+class Propane
+  class Lexer
+
+    class DFA < FA
+
+      def initialize(tokens)
+        super()
+        start_nfa = Regex::NFA.new
+        tokens.each do |name, token|
+          start_nfa.start_state.add_transition(nil, token.nfa.start_state)
+        end
+        @nfa_state_sets = {}
+        @states = []
+        @to_process = Set.new
+        nil_transition_states = start_nfa.start_state.nil_transition_states
+        register_nfa_state_set(nil_transition_states)
+        while @to_process.size > 0
+          state_set = @to_process.first
+          @to_process.delete(state_set)
+          process_nfa_state_set(state_set)
+        end
+        @start_state = @states[0]
+      end
+
+      def build_tables
+        transition_table = []
+        state_table = []
+        states = enumerate
+        states.each do |state, id|
+          accepts =
+            if state.accepts.nil?
+              TOKEN_NONE
+            elsif state.accepts.name
+              state.accepts.id
+            else
+              TOKEN_DROP
+            end
+          state_table << {
+            transition_table_index: transition_table.size,
+            n_transitions: state.transitions.size,
+            accepts: accepts,
+          }
+          state.transitions.each do |transition|
+            transition_table << {
+              first: transition.code_point_range.first,
+              last: transition.code_point_range.last,
+              destination: states[transition.destination],
+            }
+          end
+        end
+        [transition_table, state_table]
+      end
+
+      private
+
+      def register_nfa_state_set(nfa_state_set)
+        unless @nfa_state_sets.include?(nfa_state_set)
+          state_id = @states.size
+          @nfa_state_sets[nfa_state_set] = state_id
+          @states << State.new
+          @to_process << nfa_state_set
+        end
+      end
+
+      def process_nfa_state_set(nfa_state_set)
+        state_id = @nfa_state_sets[nfa_state_set]
+        state = @states[state_id]
+        if state_id > 0
+          nfa_state_set.each do |nfa_state|
+            if nfa_state.accepts
+              if state.accepts
+                if nfa_state.accepts.id < state.accepts.id
+                  state.accepts = nfa_state.accepts
+                end
+              else
+                state.accepts = nfa_state.accepts
+              end
+            end
+          end
+        end
+        transitions = transitions_for(nfa_state_set)
+        while transitions.size > 0
+          subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
+          dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
+            if transition.code_point_range.include?(subrange)
+              result << transition.destination
+            end
+            result
+          end
+          dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
+            result + dest_nfa_state.nil_transition_states
+          end
+          register_nfa_state_set(dest_nfa_states)
+          dest_state = @states[@nfa_state_sets[dest_nfa_states]]
+          state.add_transition(subrange, dest_state)
+          transitions.delete_if do |transition|
+            transition.code_point_range.last <= subrange.last
+          end
+          transitions.map! do |transition|
+            if transition.code_point_range.first <= subrange.last
+              Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
+            else
+              transition
+            end
+          end
+        end
+      end
+
+      def transitions_for(nfa_state_set)
+        nfa_state_set.reduce([]) do |result, state|
+          result + state.cp_transitions
+        end
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/parser.rb
+++ b/lib/propane/parser.rb
@ -0,0 +1,84 @@
+class Propane
+
+  class Parser
+
+    def initialize(tokens, rules)
+      @token_eof = Token.new("$", nil, TOKEN_EOF)
+      @item_sets = []
+      @item_sets_set = {}
+      start_items = rules["Start"].patterns.map do |pattern|
+        pattern.components << @token_eof
+        Item.new(pattern, 0)
+      end
+      eval_item_sets = Set.new
+      eval_item_sets << ItemSet.new(start_items)
+
+      while eval_item_sets.size > 0
+        this_eval_item_sets = eval_item_sets
+        eval_item_sets = Set.new
+        this_eval_item_sets.each do |item_set|
+          unless @item_sets_set.include?(item_set)
+            item_set.id = @item_sets.size
+            @item_sets << item_set
+            @item_sets_set[item_set] = item_set
+            item_set.follow_symbols.each do |follow_symbol|
+              unless follow_symbol == @token_eof
+                follow_set = item_set.build_follow_set(follow_symbol)
+                eval_item_sets << follow_set
+              end
+            end
+          end
+        end
+      end
+
+      @item_sets.each do |item_set|
+        process_item_set(item_set)
+        puts "Item set #{item_set.id}:"
+        ids = item_set.in_sets.map(&:id)
+        if ids.size > 0
+          puts "    (in from #{ids.join(", ")})"
+        end
+        puts item_set
+        item_set.follow_item_set.each do |follow_symbol, follow_item_set|
+          puts " #{follow_symbol.name} => #{follow_item_set.id}"
+        end
+        puts
+      end
+    end
+
+    def build_tables
+      shift_table = []
+      state_table = []
+      @item_sets.each do |item_set|
+        shift_entries = item_set.follow_symbols.select do |follow_symbol|
+          follow_symbol.is_a?(Token)
+        end.map do |follow_symbol|
+          {
+            token_id: follow_symbol.id,
+            state_id: item_set.follow_item_set[follow_symbol].id,
+          }
+        end
+        state_table << {
+          shift_index: shift_table.size,
+          n_shifts: shift_entries.size,
+        }
+        shift_table += shift_entries
+      end
+      [state_table, shift_table]
+    end
+
+    private
+
+    def process_item_set(item_set)
+      item_set.follow_symbols.each do |follow_symbol|
+        unless follow_symbol == @token_eof
+          follow_set = @item_sets_set[item_set.build_follow_set(follow_symbol)]
+          item_set.follow_item_set[follow_symbol] = follow_set
+          follow_set.in_sets << item_set
+        end
+      end
+    end
+
+  end
+
+end
--- a/lib/propane/parser/item.rb
+++ b/lib/propane/parser/item.rb
@ -0,0 +1,69 @@
+class Propane
+  class Parser
+
+    class Item
+
+      attr_reader :pattern
+      attr_reader :position
+
+      def initialize(pattern, position)
+        @pattern = pattern
+        @position = position
+      end
+
+      def next_component
+        @pattern.components[@position]
+      end
+
+      def hash
+        [@pattern, @position].hash
+      end
+
+      def ==(other)
+        @pattern == other.pattern && @position == other.position
+      end
+
+      def eql?(other)
+        self == other
+      end
+
+      def closed_items
+        if @pattern.components[@position].is_a?(Rule)
+          @pattern.components[@position].patterns.map do |pattern|
+            Item.new(pattern, 0)
+          end
+        else
+          []
+        end
+      end
+
+      def follow_symbol
+        @pattern.components[@position]
+      end
+
+      def followed_by?(symbol)
+        follow_symbol == symbol
+      end
+
+      def next_position
+        Item.new(@pattern, @position + 1)
+      end
+
+      def to_s
+        parts = []
+        @pattern.components.each_with_index do |symbol, index|
+          if @position == index
+            parts << "."
+          end
+          parts << symbol.name
+        end
+        if @position == @pattern.components.size
+          parts << "."
+        end
+        "#{@pattern.rule.name} -> #{parts.join(" ")}"
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/parser/item_set.rb
+++ b/lib/propane/parser/item_set.rb
@ -0,0 +1,76 @@
+class Propane
+  class Parser
+
+    class ItemSet
+
+      attr_reader :items
+
+      attr_accessor :id
+
+      # @return [Hash]
+      #   Maps a follow symbol to its item set.
+      attr_reader :follow_item_set
+
+      # @return [Set]
+      #   Item sets leading to this item set.
+      attr_reader :in_sets
+
+      def initialize(items)
+        @items = Set.new(items)
+        @follow_item_set = {}
+        @in_sets = Set.new
+        close!
+      end
+
+      def follow_symbols
+        Set.new(@items.map(&:follow_symbol).compact)
+      end
+
+      def build_follow_set(symbol)
+        ItemSet.new(items_followed_by(symbol).map(&:next_position))
+      end
+
+      def hash
+        @items.hash
+      end
+
+      def ==(other)
+        @items.eql?(other.items)
+      end
+
+      def eql?(other)
+        self == other
+      end
+
+      def to_s
+        @items.map(&:to_s).join("\n")
+      end
+
+      private
+
+      def close!
+        eval_items = @items
+        while eval_items.size > 0
+          this_eval_items = eval_items
+          eval_items = Set.new
+          this_eval_items.each do |item|
+            item.closed_items.each do |new_item|
+              unless @items.include?(new_item)
+                eval_items << new_item
+              end
+            end
+          end
+          @items += eval_items
+        end
+      end
+
+      def items_followed_by(symbol)
+        @items.select do |item|
+          item.followed_by?(symbol)
+        end
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/regex.rb
+++ b/lib/propane/regex.rb
@ -0,0 +1,162 @@
+class Propane
+  class Regex
+
+    attr_reader :unit
+    attr_reader :nfa
+
+    def initialize(pattern)
+      @pattern = pattern.dup
+      @unit = parse_alternates
+      @nfa = @unit.to_nfa
+      if @pattern != ""
+        raise Error.new(%[Unexpected "#{@pattern}" in pattern])
+      end
+    end
+
+    private
+
+    def parse_alternates
+      au = AlternatesUnit.new
+      while @pattern != ""
+        c = @pattern[0]
+        return au if c == ")"
+        @pattern.slice!(0)
+        case c
+        when "["
+          au << parse_character_class
+        when "("
+          au << parse_group
+        when "*", "+", "?", "{"
+          if last_unit = au.last_unit
+            case c
+            when "*"
+              min_count, max_count = 0, nil
+            when "+"
+              min_count, max_count = 1, nil
+            when "?"
+              min_count, max_count = 0, 1
+            when "{"
+              min_count, max_count = parse_curly_count
+            end
+            mu = MultiplicityUnit.new(last_unit, min_count, max_count)
+            au.replace_last!(mu)
+          else
+            raise Error.new("#{c} follows nothing")
+          end
+        when "|"
+          au.new_alternate!
+        when "\\"
+          au << parse_backslash
+        when "."
+          au << period_character_class
+        else
+          au << CharacterRangeUnit.new(c)
+        end
+      end
+      au
+    end
+
+    def parse_group
+      au = parse_alternates
+      if @pattern[0] != ")"
+        raise Error.new("Unterminated group in pattern")
+      end
+      @pattern.slice!(0)
+      au
+    end
+
+    def parse_character_class
+      ccu = CharacterClassUnit.new
+      index = 0
+      loop do
+        if @pattern == ""
+          raise Error.new("Unterminated character class")
+        end
+        c = @pattern.slice!(0)
+        if c == "]"
+          break
+        elsif c == "^" && index == 0
+          ccu.negate = true
+        elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
+          ccu << CharacterRangeUnit.new(c)
+        elsif c == "\\"
+          ccu << parse_backslash
+        elsif c == "-" && @pattern[0] != "]"
+          begin_cu = ccu.last_unit
+          unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.code_point_range.size == 1
+            raise Error.new("Character range must be between single characters")
+          end
+          if @pattern[0] == "\\"
+            @pattern.slice!(0)
+            end_cu = parse_backslash
+            unless end_cu.is_a?(CharacterRangeUnit) && end_cu.code_point_range.size == 1
+              raise Error.new("Character range must be between single characters")
+            end
+            max_code_point = end_cu.code_point
+          else
+            max_code_point = @pattern[0].ord
+            @pattern.slice!(0)
+          end
+          cru = CharacterRangeUnit.new(begin_cu.first, max_code_point)
+          ccu.replace_last!(cru)
+        else
+          ccu << CharacterRangeUnit.new(c)
+        end
+        index += 1
+      end
+      ccu
+    end
+
+    def parse_curly_count
+      if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
+        min_count, comma, max_count, pattern = $1, $2, $3, $4
+        min_count = min_count.to_i
+        if comma.to_s == ""
+          max_count = min_count
+        elsif max_count.to_s != ""
+          max_count = max_count.to_i
+          if max_count < min_count
+            raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
+          end
+        else
+          max_count = nil
+        end
+        @pattern = pattern
+        [min_count, max_count]
+      else
+        raise Error.new("Unexpected match count at #{@pattern}")
+      end
+    end
+
+    def parse_backslash
+      if @pattern == ""
+        raise Error.new("Error: unfollowed \\")
+      else
+        c = @pattern.slice!(0)
+        case c
+        when "d"
+          CharacterRangeUnit.new("0", "9")
+        when "s"
+          ccu = CharacterClassUnit.new
+          ccu << CharacterRangeUnit.new(" ")
+          ccu << CharacterRangeUnit.new("\t")
+          ccu << CharacterRangeUnit.new("\r")
+          ccu << CharacterRangeUnit.new("\n")
+          ccu << CharacterRangeUnit.new("\f")
+          ccu << CharacterRangeUnit.new("\v")
+          ccu
+        else
+          CharacterRangeUnit.new(c)
+        end
+      end
+    end
+
+    def period_character_class
+      ccu = CharacterClassUnit.new
+      ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
+      ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
+      ccu
+    end
+
+  end
+end
--- a/lib/propane/regex/nfa.rb
+++ b/lib/propane/regex/nfa.rb
@ -0,0 +1,26 @@
+class Propane
+  class Regex
+
+    class NFA < FA
+
+      attr_reader :end_state
+
+      def initialize
+        super()
+        @end_state = State.new
+      end
+
+      class << self
+
+        def empty
+          nfa = NFA.new
+          nfa.start_state.add_transition(nil, nfa.end_state)
+          nfa
+        end
+
+      end
+
+    end
+
+  end
+end
--- a/lib/propane/regex/unit.rb
+++ b/lib/propane/regex/unit.rb
@ -0,0 +1,172 @@
+class Propane
+  class Regex
+
+    class Unit
+    end
+
+    class SequenceUnit < Unit
+      attr_accessor :units
+      def initialize
+        @units = []
+      end
+      def method_missing(*args)
+        @units.__send__(*args)
+      end
+      def to_nfa
+        if @units.empty?
+          NFA.empty
+        else
+          nfa = NFA.new
+          unit_nfas = @units.map do |unit|
+            unit.to_nfa
+          end
+          nfa.start_state.add_transition(nil, unit_nfas[0].start_state)
+          unit_nfas.reduce do |prev_nfa, next_nfa|
+            prev_nfa.end_state.add_transition(nil, next_nfa.start_state)
+            next_nfa
+          end.end_state.add_transition(nil, nfa.end_state)
+          nfa
+        end
+      end
+    end
+
+    class AlternatesUnit < Unit
+      attr_accessor :alternates
+      def initialize
+        @alternates = []
+        new_alternate!
+      end
+      def new_alternate!
+        @alternates << SequenceUnit.new
+      end
+      def <<(unit)
+        @alternates[-1] << unit
+      end
+      def last_unit
+        @alternates[-1][-1]
+      end
+      def replace_last!(new_unit)
+        @alternates[-1][-1] = new_unit
+      end
+      def to_nfa
+        if @alternates.size == 0
+          NFA.empty
+        elsif @alternates.size == 1
+          @alternates[0].to_nfa
+        else
+          nfa = NFA.new
+          alternate_nfas = @alternates.map do |alternate|
+            alternate.to_nfa
+          end
+          alternate_nfas.each do |alternate_nfa|
+            nfa.start_state.add_transition(nil, alternate_nfa.start_state)
+            alternate_nfa.end_state.add_transition(nil, nfa.end_state)
+          end
+          nfa
+        end
+      end
+    end
+
+    class CharacterRangeUnit < Unit
+      attr_reader :code_point_range
+      def initialize(c1, c2 = nil)
+        @code_point_range = CodePointRange.new(c1, c2)
+      end
+      def first
+        @code_point_range.first
+      end
+      def last
+        @code_point_range.last
+      end
+      def to_nfa
+        nfa = NFA.new
+        nfa.start_state.add_transition(@code_point_range, nfa.end_state)
+        nfa
+      end
+    end
+
+    class CharacterClassUnit < Unit
+      attr_accessor :units
+      attr_accessor :negate
+      def initialize
+        @units = []
+        @negate = false
+      end
+      def initialize
+        @units = []
+      end
+      def method_missing(*args)
+        @units.__send__(*args)
+      end
+      def <<(thing)
+        if thing.is_a?(CharacterClassUnit)
+          thing.each do |ccu_unit|
+            @units << ccu_unit
+          end
+        else
+          @units << thing
+        end
+      end
+      def last_unit
+        @units[-1]
+      end
+      def replace_last!(new_unit)
+        @units[-1] = new_unit
+      end
+      def to_nfa
+        nfa = NFA.new
+        if @units.empty?
+          nfa.start_state.add_transition(nil, nfa.end_state)
+        else
+          code_point_ranges = @units.map(&:code_point_range)
+          if @negate
+            code_point_ranges = CodePointRange.invert_ranges(code_point_ranges)
+          end
+          code_point_ranges.each do |code_point_range|
+            nfa.start_state.add_transition(code_point_range, nfa.end_state)
+          end
+        end
+        nfa
+      end
+    end
+
+    class MultiplicityUnit < Unit
+      attr_accessor :unit
+      attr_accessor :min_count
+      attr_accessor :max_count
+      def initialize(unit, min_count, max_count)
+        @unit = unit
+        @min_count = min_count
+        @max_count = max_count
+      end
+      def to_nfa
+        nfa = NFA.new
+        last_state = nfa.start_state
+        unit_nfa = nil
+        @min_count.times do
+          unit_nfa = @unit.to_nfa
+          last_state.add_transition(nil, unit_nfa.start_state)
+          last_state = unit_nfa.end_state
+        end
+        last_state.add_transition(nil, nfa.end_state)
+        if @max_count.nil?
+          if @min_count == 0
+            unit_nfa = @unit.to_nfa
+            last_state.add_transition(nil, unit_nfa.start_state)
+          end
+          unit_nfa.end_state.add_transition(nil, unit_nfa.start_state)
+          unit_nfa.end_state.add_transition(nil, nfa.end_state)
+        else
+          (@max_count - @min_count).times do
+            unit_nfa = @unit.to_nfa
+            last_state.add_transition(nil, unit_nfa.start_state)
+            unit_nfa.end_state.add_transition(nil, nfa.end_state)
+            last_state = unit_nfa.end_state
+          end
+        end
+        nfa
+      end
+    end
+
+  end
+end
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -0,0 +1,39 @@
+class Propane
+
+  class Rule
+
+    class Pattern
+
+      attr_reader :rule
+
+      attr_reader :components
+
+      attr_reader :code
+
+      def initialize(rule, components, code)
+        @rule = rule
+        @components = components
+        @code = code
+      end
+
+    end
+
+    attr_reader :id
+
+    attr_reader :name
+
+    attr_reader :patterns
+
+    def initialize(name, id)
+      @name = name
+      @id = id
+      @patterns = []
+    end
+
+    def add_pattern(components, code)
+      @patterns << Pattern.new(self, components, code)
+    end
+
+  end
+
+end
--- a/lib/propane/token.rb
+++ b/lib/propane/token.rb
@ -0,0 +1,42 @@
+class Propane
+
+  class Token
+
+    # @return [String]
+    #   Token name.
+    attr_reader :name
+
+    # @return [String]
+    #   Token pattern.
+    attr_reader :pattern
+
+    # @return [Integer]
+    #   Token ID.
+    attr_reader :id
+
+    # @return [Regex::NFA]
+    #   Regex NFA for matching the token.
+    attr_reader :nfa
+
+    def initialize(name, pattern, id)
+      @name = name
+      @pattern = pattern
+      @id = id
+      unless pattern.nil?
+        regex = Regex.new(pattern)
+        regex.nfa.end_state.accepts = self
+        @nfa = regex.nfa
+      end
+    end
+
+    def c_name
+      @name.upcase
+    end
+
+    def to_s
+      @name
+    end
+
+  end
+
+end
--- a/lib/propane/version.rb
+++ b/lib/propane/version.rb
@ -0,0 +1,3 @@
+class Propane
+  VERSION = "0.1.0"
+end
--- a/propane.sh
+++ b/propane.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+exec bundle exec ruby -Ilib bin/propane "$@"
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef
--- a/spec/propane/code_point_range_spec.rb
+++ b/spec/propane/code_point_range_spec.rb
@ -0,0 +1,87 @@
+class Propane
+  describe CodePointRange do
+
+    describe "#<=>" do
+      it "sorts ranges" do
+        arr = [
+          CodePointRange.new(100,102),
+          CodePointRange.new(65, 68),
+          CodePointRange.new(65, 65),
+          CodePointRange.new(100, 100),
+          CodePointRange.new(68, 70),
+        ]
+        arr.sort!
+        expect(arr[0]).to eq CodePointRange.new(65, 65)
+        expect(arr[1]).to eq CodePointRange.new(65, 68)
+        expect(arr[2]).to eq CodePointRange.new(68, 70)
+        expect(arr[3]).to eq CodePointRange.new(100, 100)
+        expect(arr[4]).to eq CodePointRange.new(100, 102)
+      end
+    end
+
+    describe "#include?" do
+      it "returns whether the code point is included in the range" do
+        expect(CodePointRange.new(100).include?(100)).to be_truthy
+        expect(CodePointRange.new(100, 100).include?(99)).to be_falsey
+        expect(CodePointRange.new(100, 100).include?(101)).to be_falsey
+        expect(CodePointRange.new(100, 120).include?(99)).to be_falsey
+        expect(CodePointRange.new(100, 120).include?(100)).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(110)).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(120)).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(121)).to be_falsey
+      end
+
+      it "returns whether the range is included in the range" do
+        expect(CodePointRange.new(100).include?(CodePointRange.new(100))).to be_truthy
+        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99))).to be_falsey
+        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99, 100))).to be_falsey
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(90, 110))).to be_falsey
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 130))).to be_falsey
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 120))).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 110))).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 120))).to be_truthy
+        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(102, 118))).to be_truthy
+      end
+    end
+
+    describe ".invert_ranges" do
+      it "inverts ranges" do
+        expect(CodePointRange.invert_ranges(
+          [CodePointRange.new(60, 90),
+           CodePointRange.new(80, 85),
+           CodePointRange.new(80, 100),
+           CodePointRange.new(101),
+           CodePointRange.new(200, 300)])).to eq [
+             CodePointRange.new(0, 59),
+             CodePointRange.new(102, 199),
+             CodePointRange.new(301, 0xFFFFFFFF)]
+        expect(CodePointRange.invert_ranges(
+          [CodePointRange.new(0, 500),
+           CodePointRange.new(7000, 0xFFFFFFFF)])).to eq [
+             CodePointRange.new(501, 6999)]
+      end
+    end
+
+    describe ".first_subrange" do
+      it "returns the first subrange to split" do
+        expect(CodePointRange.first_subrange(
+          [CodePointRange.new(65, 90),
+           CodePointRange.new(66, 66),
+           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65)
+        expect(CodePointRange.first_subrange(
+          [CodePointRange.new(65, 90)])).to eq CodePointRange.new(65, 90)
+        expect(CodePointRange.first_subrange(
+          [CodePointRange.new(65, 90),
+           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65, 79)
+        expect(CodePointRange.first_subrange(
+          [CodePointRange.new(65, 90),
+           CodePointRange.new(65, 100),
+           CodePointRange.new(65, 95)])).to eq CodePointRange.new(65, 90)
+        expect(CodePointRange.first_subrange(
+          [CodePointRange.new(100, 120),
+           CodePointRange.new(70, 90)])).to eq CodePointRange.new(70, 90)
+      end
+    end
+
+  end
+end
--- a/spec/propane/lexer/dfa_spec.rb
+++ b/spec/propane/lexer/dfa_spec.rb
@ -0,0 +1,121 @@
+class TestLexer
+  def initialize(token_dfa)
+    @token_dfa = token_dfa
+  end
+
+  def lex(input)
+    input_chars = input.chars
+    output = []
+    while lexed_token = lex_token(input_chars)
+      output << lexed_token
+      input_chars.slice!(0, lexed_token[1].size)
+    end
+    unless input_chars.empty?
+      raise "Unmatched input #{input_chars.join(" ")}"
+    end
+    output
+  end
+
+  def lex_token(input_chars)
+    return nil if input_chars.empty?
+    s = ""
+    current_state = @token_dfa.start_state
+    last_accepts = nil
+    last_s = nil
+    input_chars.each_with_index do |input_char, index|
+      if next_state = transition(current_state, input_char)
+        s += input_char
+        current_state = next_state
+        if current_state.accepts
+          last_accepts = current_state.accepts
+          last_s = s
+        end
+      else
+        break
+      end
+    end
+    if last_accepts
+      [last_accepts.name, last_s]
+    end
+  end
+
+  def transition(state, input_char)
+    state.transitions.each do |transition|
+      if transition.code_point_range.include?(input_char.ord)
+        return transition.destination
+      end
+    end
+    nil
+  end
+end
+
+def run(grammar, input)
+  propane = Propane.new(grammar)
+  token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
+  test_lexer = TestLexer.new(token_dfa)
+  test_lexer.lex(input)
+end
+
+describe Propane::Lexer::DFA do
+  it "lexes a simple token" do
+    expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
+token foo
+EOF
+  end
+
+  it "lexes two tokens" do
+    expected = [
+      ["foo", "foo"],
+      ["bar", "bar"],
+    ]
+    expect(run(<<EOF, "foobar")).to eq expected
+token foo
+token bar
+EOF
+  end
+
+  it "lexes the longer of multiple options" do
+    expected = [
+      ["identifier", "foobar"],
+    ]
+    expect(run(<<EOF, "foobar")).to eq expected
+token foo
+token bar
+token identifier [a-z]+
+EOF
+    expected = [
+      ["plusplus", "++"],
+      ["plus", "+"],
+    ]
+    expect(run(<<EOF, "+++")).to eq expected
+token plus \\+
+token plusplus \\+\\+
+EOF
+  end
+
+  it "lexes whitespace" do
+    expected = [
+      ["foo", "foo"],
+      ["WS", " \t"],
+      ["bar", "bar"],
+    ]
+    expect(run(<<EOF, "foo \tbar")).to eq expected
+token foo
+token bar
+token WS \\s+
+EOF
+  end
+
+  it "allows dropping a matched pattern" do
+    expected = [
+      ["foo", "foo"],
+      [nil, " \t"],
+      ["bar", "bar"],
+    ]
+    expect(run(<<EOF, "foo \tbar")).to eq expected
+token foo
+token bar
+drop \\s+
+EOF
+  end
+end
--- a/spec/propane/parser/item_spec.rb
+++ b/spec/propane/parser/item_spec.rb
@ -0,0 +1,19 @@
+class Propane
+  class Parser
+
+    describe Item do
+
+      it "operates properly with a set" do
+        rule = Object.new
+        item1 = Item.new(rule, 2)
+        item2 = Item.new(rule, 2)
+        expect(item1).to eq item2
+        expect(item1.eql?(item2)).to be_truthy
+        set = Set.new([item1, item2])
+        expect(set.size).to eq 1
+      end
+
+    end
+
+  end
+end
--- a/spec/propane/regex_spec.rb
+++ b/spec/propane/regex_spec.rb
@ -0,0 +1,333 @@
+class Propane
+  RSpec.describe Regex do
+
+    it "parses an empty expression" do
+      regex = Regex.new("")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0].size).to eq 0
+    end
+
+    it "parses a single character unit expression" do
+      regex = Regex.new("a")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a group with a single character unit expression" do
+      regex = Regex.new("(a)")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::AlternatesUnit
+      alt_unit = seq_unit[0]
+      expect(alt_unit.alternates.size).to eq 1
+      expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
+      expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a *" do
+      regex = Regex.new("a*")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 0
+      expect(m_unit.max_count).to be_nil
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a +" do
+      regex = Regex.new("a+")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 1
+      expect(m_unit.max_count).to be_nil
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a ?" do
+      regex = Regex.new("a?")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 0
+      expect(m_unit.max_count).to eq 1
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a multiplicity count" do
+      regex = Regex.new("a{5}")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 5
+      expect(m_unit.max_count).to eq 5
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a minimum-only multiplicity count" do
+      regex = Regex.new("a{5,}")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 5
+      expect(m_unit.max_count).to be_nil
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses a minimum and maximum multiplicity count" do
+      regex = Regex.new("a{5,8}")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
+      m_unit = seq_unit[0]
+      expect(m_unit.min_count).to eq 5
+      expect(m_unit.max_count).to eq 8
+      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
+      expect(m_unit.unit.first).to eq "a".ord
+    end
+
+    it "parses an escaped *" do
+      regex = Regex.new("a\\*")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 2
+      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[0].first).to eq "a".ord
+      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[1].first).to eq "*".ord
+    end
+
+    it "parses an escaped +" do
+      regex = Regex.new("a\\+")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 2
+      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[0].first).to eq "a".ord
+      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[1].first).to eq "+".ord
+    end
+
+    it "parses an escaped \\" do
+      regex = Regex.new("\\\\d")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 2
+      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[0].first).to eq "\\".ord
+      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
+      expect(seq_unit[1].first).to eq "d".ord
+    end
+
+    it "parses a character class" do
+      regex = Regex.new("[a-z_]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.negate).to be_falsey
+      expect(ccu.size).to eq 2
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "a".ord
+      expect(ccu[0].last).to eq "z".ord
+      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[1].first).to eq "_".ord
+    end
+
+    it "parses a negated character class" do
+      regex = Regex.new("[^xyz]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.negate).to be_truthy
+      expect(ccu.size).to eq 3
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "x".ord
+    end
+
+    it "parses - as a plain character at beginning of a character class" do
+      regex = Regex.new("[-9]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.size).to eq 2
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "-".ord
+    end
+
+    it "parses - as a plain character at end of a character class" do
+      regex = Regex.new("[0-]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.size).to eq 2
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "0".ord
+      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[1].first).to eq "-".ord
+    end
+
+    it "parses - as a plain character at beginning of a negated character class" do
+      regex = Regex.new("[^-9]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.negate).to be_truthy
+      expect(ccu.size).to eq 2
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "-".ord
+    end
+
+    it "parses . as a plain character in a character class" do
+      regex = Regex.new("[.]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.negate).to be_falsey
+      expect(ccu.size).to eq 1
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq ".".ord
+    end
+
+    it "parses - as a plain character when escaped in middle of character class" do
+      regex = Regex.new("[0\\-9]")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      seq_unit = regex.unit.alternates[0]
+      expect(seq_unit.size).to eq 1
+      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
+      ccu = seq_unit[0]
+      expect(ccu.negate).to be_falsey
+      expect(ccu.size).to eq 3
+      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[0].first).to eq "0".ord
+      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[1].first).to eq "-".ord
+      expect(ccu[2]).to be_a Regex::CharacterRangeUnit
+      expect(ccu[2].first).to eq "9".ord
+    end
+
+    it "parses alternates" do
+      regex = Regex.new("ab|c")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 2
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[0].size).to eq 2
+      expect(regex.unit.alternates[1].size).to eq 1
+    end
+
+    it "parses a ." do
+      regex = Regex.new("a.b")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 1
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
+      expect(regex.unit.alternates[0][1].units.size).to eq 2
+      expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
+    end
+
+    it "parses something complex" do
+      regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
+      expect(regex.unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates.size).to eq 4
+      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[0].size).to eq 1
+      expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
+      expect(regex.unit.alternates[0][0].min_count).to eq 0
+      expect(regex.unit.alternates[0][0].max_count).to be_nil
+      expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
+      expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
+      expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
+      expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
+      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[1].size).to eq 1
+      expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
+      expect(regex.unit.alternates[1][0].negate).to be_truthy
+      expect(regex.unit.alternates[1][0].size).to eq 1
+      expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[2].size).to eq 2
+      expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[2][0].first).to eq "|".ord
+      expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[2][1].first).to eq "v".ord
+      expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
+      expect(regex.unit.alternates[3].size).to eq 1
+      expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
+      expect(regex.unit.alternates[3][0].min_count).to eq 1
+      expect(regex.unit.alternates[3][0].max_count).to be_nil
+      expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
+      expect(regex.unit.alternates[3][0].unit.size).to eq 1
+      expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
+      expect(regex.unit.alternates[3][0].unit[0].first).to eq "x".ord
+      expect(regex.unit.alternates[3][0].unit[0].last).to eq "y".ord
+    end
+
+  end
+end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -0,0 +1,97 @@
+require "fileutils"
+
+describe Propane do
+  def write_grammar(grammar)
+    File.write("spec/run/testparser.i", grammar)
+  end
+
+  def build_parser
+    result = system(*%w[./propane.sh spec/run/testparser.i spec/run/testparser.d])
+    expect(result).to be_truthy
+  end
+
+  def compile(test_file)
+    result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d], test_file)
+    expect(result).to be_truthy
+  end
+
+  def run
+    result = system("spec/run/testparser")
+    expect(result).to be_truthy
+  end
+
+  before(:each) do
+    FileUtils.rm_rf("spec/run")
+    FileUtils.mkdir_p("spec/run")
+  end
+
+  it "generates a D lexer" do
+    write_grammar <<EOF
+token int \\d+
+token plus \\+
+token times \\*
+drop \\s+
+Start: [Foo] <<
+>>
+Foo: [int] <<
+>>
+Foo: [plus] <<
+>>
+EOF
+    build_parser
+    compile("spec/test_d_lexer.d")
+    run
+  end
+
+  it "generates a parser" do
+    write_grammar <<EOF
+token plus \\+
+token times \\*
+token zero 0
+token one 1
+Start: [E] <<
+>>
+E: [E times B] <<
+>>
+E: [E plus B] <<
+>>
+E: [B] <<
+>>
+B: [zero] <<
+>>
+B: [one] <<
+>>
+EOF
+    build_parser
+  end
+
+  it "distinguishes between multiple identical rules with lookahead symbol" do
+    write_grammar <<EOF
+token a
+token b
+Start: [R1 a] <<
+>>
+Start: [R2 b] <<
+>>
+R1: [a b] <<
+>>
+R2: [a b] <<
+>>
+EOF
+    build_parser
+  end
+
+  it "handles reducing a rule that could be arrived at from multiple states" do
+    write_grammar <<EOF
+token a
+token b
+Start: [a R1] <<
+>>
+Start: [b R1] <<
+>>
+R1: [b] <<
+>>
+EOF
+    build_parser
+  end
+end
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@ -0,0 +1,11 @@
+require "bundler/setup"
+require "propane"
+
+RSpec.configure do |config|
+  # Enable flags like --only-failures and --next-failure
+  config.example_status_persistence_file_path = ".rspec_status"
+
+  config.expect_with :rspec do |c|
+    c.syntax = :expect
+  end
+end
--- a/spec/test_d_lexer.d
+++ b/spec/test_d_lexer.d
@ -0,0 +1,66 @@
+import testparser;
+import std.stdio;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    alias DCP = Testparser.Decoder.DecodedCodePoint;
+    string inputstring = "5+\n 66";
+    const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr;
+    size_t input_length = inputstring.length;
+    DCP dcp;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP('5', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP('+', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP('\n', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP(' ', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP('6', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP('6', 1u));
+    input += dcp.code_point_length;
+    input_length -= dcp.code_point_length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
+
+    inputstring = "\xf0\x9f\xa7\xa1";
+    input = cast(const(ubyte) *)inputstring.ptr;
+    input_length = inputstring.length;
+    dcp = Testparser.Decoder.decode_code_point(input, input_length);
+    assert(dcp == DCP(0x1F9E1, 4u));
+}
+
+unittest
+{
+    alias LT = Testparser.Lexer.LexedToken;
+    string input = "5 + 4 * \n677 + 567";
+    Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length);
+    assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_INT));
+    assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_PLUS));
+    assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_INT));
+    assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_TIMES));
+    assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
+    assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
+    assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
+    assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
+
+    lexer = new Testparser.Lexer(null, 0u);
+    assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
+}
--- a/tests/Makefile
+++ b/tests/Makefile
@ -1,14 +0,0 @@
-
-all:
-	for d in *; do \
-		if [ -d $$d ]; then \
-			make -C $$d; \
-		fi; \
-	done
-
-clean:
-	for d in *; do \
-		if [ -d $$d ]; then \
-			make -C $$d clean; \
-		fi; \
-	done
--- a/tests/build/Makefile
+++ b/tests/build/Makefile
@ -1,15 +0,0 @@
-
-TARGET   := test
-I_SOURCE := itest
-CXXFLAGS := -O2
-LDFLAGS  := -lpcre
-
-all: $(TARGET)
-	./$(TARGET)
-
-$(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
-	imbecile $(I_SOURCE).I
-	$(CXX) -o $@ *.cc $(LDFLAGS)
-
-clean:
-	-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h
--- a/tests/build/itest.I
+++ b/tests/build/itest.I
@ -1,37 +0,0 @@
-
-[tokens]
-
-AND         and
-OR          or
-NOT         not
-LPAREN      \(
-RPAREN      \)
-WS          \s+
-EQUALS      = %{ cout << "Saw '='" << endl; %}
-IDENTIFIER  [a-zA-Z_][a-zA-Z_0-9]* %{
-    cout << "Identify: '" << matches[0] << "'" << endl;
-%}
-
-DEC_INT     [1-9]\d*\b
-${
-    uint64_t value;
-$}
-%{
-    sscanf(matches[0].c_str(), "%lld", &value);
-    cout << "value: " << value << endl;
-%}
-
-HEX_INT     0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
-    sscanf(matches[1].c_str(), "%llx", &value);
-    cout << "value: " << value << endl;
-%}
-
-OCT_INT     0([0-7]*)\b
-BIN_INT     0b([01]+)\b
-
-[rules]
-
-Assignment := IDENTIFIER ASSIGN Expression
-
-Expression := IDENTIFIER \
-            | Assignment
--- a/tests/build/main.cc
+++ b/tests/build/main.cc
@ -1,17 +0,0 @@
-
-#include <sstream>
-#include <string>
-
-#include "itest.h"
-
-using namespace std;
-
-int main(int argc, char * argv[])
-{
-    Parser p;
-    stringstream t(string(
-                "hi there (one and two and three and four) or (two = nine)\n"
-                "0x42 12345 0 011 0b0011\n"
-                ));
-    p.parse(t);
-}
--- a/tmpl/parser.cc
+++ b/tmpl/parser.cc
@ -1,202 +0,0 @@
-
-#include <string.h>                 /* memcpy() */
-#include <pcre.h>
-
-#include <iostream>
-#include <vector>
-
-#include {%header_name%}
-
-using namespace std;
-
-#ifdef I_NAMESPACE
-namespace I_NAMESPACE {
-#endif
-
-I_CLASSNAME::I_CLASSNAME()
-    : m_errstr(NULL)
-{
-}
-
-static TokenRef buildToken(int typeindex)
-{
-    TokenRef token;
-    switch (typeindex)
-    {
-        {%buildToken%}
-    }
-    if (!token.isNull())
-    {
-        token->setType(typeindex);
-    }
-    return token;
-}
-
-static void read_istream(istream & i, vector<char> & buff, int & size)
-{
-    size = 0;
-    int bytes_read;
-    char read_buff[1000];
-    while (!i.eof())
-    {
-        i.read(&read_buff[0], sizeof(read_buff));
-        bytes_read = i.gcount();
-        size += bytes_read;
-        for (int j = 0; j < bytes_read; j++)
-            buff.push_back(read_buff[j]);
-    }
-}
-
-bool I_CLASSNAME::parse(istream & i)
-{
-    struct {
-        const char * name;
-        const char * definition;
-        bool process;
-        pcre * re;
-        pcre_extra * re_extra;
-    } tokens[] = {
-        {%token_list%}
-    };
-
-    if (sizeof(tokens)/sizeof(tokens[0]) == 0)
-    {
-        m_errstr = "No tokens defined";
-        return false;
-    }
-
-    vector<char> buff;
-    int buff_size;
-    read_istream(i, buff, buff_size);
-
-    if (buff_size <= 0)
-    {
-        m_errstr = "0-length input string";
-        return false;
-    }
-
-    /* append trailing NUL byte for pcre functions */
-    buff.push_back('\0');
-
-    /* compile all token regular expressions */
-    for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
-    {
-        const char * errptr;
-        int erroffset;
-        tokens[i].re = pcre_compile(tokens[i].definition, 0,
-                &errptr, &erroffset, NULL);
-        if (tokens[i].re == NULL)
-        {
-            cerr << "Error compiling token '" << tokens[i].name
-                << "' regular expression at position " << erroffset
-                << ": " << errptr << endl;
-            m_errstr = "Error in token regular expression";
-            return false;
-        }
-        tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
-    }
-
-    int buff_pos = 0;
-    const int ovector_num_matches = 16;
-    const int ovector_size = 3 * (ovector_num_matches + 1);
-    int ovector[ovector_size];
-    while (buff_pos < buff_size)
-    {
-        int longest_match_length = 0;
-        int longest_match_index = -1;
-        int longest_match_ovector[ovector_size];
-        for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
-        {
-            int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
-                    &buff[0], buff_size, buff_pos,
-                    PCRE_ANCHORED | PCRE_NOTEMPTY,
-                    ovector, ovector_size);
-            if (rc > 0)
-            {
-                /* this pattern matched some of the input */
-                int len = ovector[1] - ovector[0];
-                if (len > longest_match_length)
-                {
-                    longest_match_length = len;
-                    longest_match_index = i;
-                    memcpy(longest_match_ovector, ovector, sizeof(ovector));
-                }
-            }
-        }
-        if (longest_match_index < 0)
-        {
-            /* no pattern matched the input at the current position */
-            cerr << "Parse error" << endl;
-            return false;
-        }
-        Matches matches(tokens[longest_match_index].re,
-                &buff[0], longest_match_ovector, ovector_size);
-        TokenRef token = buildToken(longest_match_index);
-        if (token.isNull())
-        {
-            cerr << "Internal Error: null token" << endl;
-            return false;
-        }
-        token->process(matches);
-        m_tokens.push_back(token);
-        buff_pos += longest_match_length;
-    }
-}
-
-refptr<Node> Node::operator[](int index)
-{
-    return (0 <= index && index < m_indexed_children.size())
-        ? m_indexed_children[index]
-        : NULL;
-}
-
-refptr<Node> Node::operator[](const std::string & index)
-{
-    return (m_named_children.find(index) != m_named_children.end())
-        ? m_named_children[index]
-        : NULL;
-}
-
-void Token::process(const Matches & matches)
-{
-    {%token_code%}
-}
-
-Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
-    : m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
-{
-}
-
-std::string Matches::operator[](int index) const
-{
-    if (0 <= index && index < (m_ovec_size / 3))
-    {
-        int idx = 2 * index;
-        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
-        {
-            return string(m_data, m_ovector[idx],
-                    m_ovector[idx + 1] - m_ovector[idx]);
-        }
-    }
-    return "";
-}
-
-std::string Matches::operator[](const std::string & index) const
-{
-    int idx = pcre_get_stringnumber(m_re, index.c_str());
-    if (idx > 0 && idx < (m_ovec_size / 3))
-    {
-        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
-        {
-            return string(m_data, m_ovector[idx],
-                    m_ovector[idx + 1] - m_ovector[idx]);
-        }
-    }
-    return "";
-}
-
-{%token_classes_code%}
-
-#ifdef I_NAMESPACE
-};
-#endif
--- a/tmpl/parser.h
+++ b/tmpl/parser.h
@ -1,181 +0,0 @@
-
-#ifndef IMBECILE_PARSER_HEADER
-#define IMBECILE_PARSER_HEADER
-
-#include <pcre.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-
-#include <iostream>
-#include <map>
-#include <vector>
-#include <list>
-
-{%user_includes%}
-
-{%defines%}
-
-#ifdef I_NAMESPACE
-namespace I_NAMESPACE {
-#endif
-
-#ifndef REFPTR_H
-#define REFPTR_H REFPTR_H
-
-/* Author: Josh Holtrop
- * Purpose: Provide a reference-counting pointer-like first order
- *   C++ object that will free the object it is pointing to when
- *   all references to it have been destroyed.
- * This implementation does not solve the circular reference problem.
- * I was not concerned with that when developing this class.
- */
-#include <stdlib.h>             /* NULL */
-
-template <typename T>
-class refptr
-{
-    public:
-        refptr<T>();
-        refptr<T>(T * ptr);
-        refptr<T>(const refptr<T> & orig);
-        refptr<T> & operator=(const refptr<T> & orig);
-        refptr<T> & operator=(T * ptr);
-        ~refptr<T>();
-        T & operator*() const { return *m_ptr; }
-        T * operator->() const { return m_ptr; }
-        bool isNull() const { return m_ptr == NULL; }
-
-    private:
-        void cloneFrom(const refptr<T> & orig);
-        void destroy();
-
-        T * m_ptr;
-        int * m_refCount;
-};
-
-template <typename T> refptr<T>::refptr()
-{
-    m_ptr = NULL;
-    m_refCount = NULL;
-}
-
-template <typename T> refptr<T>::refptr(T * ptr)
-{
-    m_ptr = ptr;
-    m_refCount = new int;
-    *m_refCount = 1;
-}
-
-template <typename T> refptr<T>::refptr(const refptr<T> & orig)
-{
-    cloneFrom(orig);
-}
-
-template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
-{
-    destroy();
-    cloneFrom(orig);
-    return *this;
-}
-
-template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
-{
-    destroy();
-    m_ptr = ptr;
-    m_refCount = new int;
-    *m_refCount = 1;
-    return *this;
-}
-
-template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
-{
-    this->m_ptr = orig.m_ptr;
-    this->m_refCount = orig.m_refCount;
-    if (m_refCount != NULL)
-        (*m_refCount)++;
-}
-
-template <typename T> refptr<T>::~refptr()
-{
-    destroy();
-}
-
-template <typename T> void refptr<T>::destroy()
-{
-    if (m_refCount != NULL)
-    {
-        if (*m_refCount <= 1)
-        {
-            delete m_ptr;
-            delete m_refCount;
-        }
-        else
-        {
-            (*m_refCount)--;
-        }
-    }
-}
-
-#endif
-
-
-class Matches
-{
-    public:
-        Matches(pcre * re, const char * data, int * ovector, int ovec_size);
-        std::string operator[](int index) const;
-        std::string operator[](const std::string & index) const;
-
-    protected:
-        pcre * m_re;
-        const char * m_data;
-        int * m_ovector;
-        int m_ovec_size;
-};
-
-class Node
-{
-    public:
-        refptr<Node> operator[](int index);
-        refptr<Node> operator[](const std::string & index);
-
-    protected:
-        std::map< std::string, refptr<Node> > m_named_children;
-        std::vector< refptr<Node> > m_indexed_children;
-};
-typedef refptr<Node> NodeRef;
-
-class Token : public Node
-{
-    public:
-        virtual void process(const Matches & matches);
-        void setType(int type) { m_type = type; }
-        int getType() const { return m_type; }
-
-    protected:
-        int m_type;
-
-        {%token_data%}
-};
-typedef refptr<Token> TokenRef;
-
-{%token_classes%}
-
-class I_CLASSNAME
-{
-    public:
-        I_CLASSNAME();
-        bool parse(std::istream & in);
-        const char * getError() { return m_errstr; }
-
-    protected:
-        const char * m_errstr;
-        std::list<TokenRef> m_tokens;
-};
-
-#ifdef I_NAMESPACE
-};
-#endif
-
-#endif /* IMBECILE_PARSER_HEADER */
Author	SHA1	Message	Date
Josh Holtrop	164a4854fb	Update README	2022-05-30 15:40:31 -04:00
Josh Holtrop	ddadc2008b	Rename to propane	2022-05-28 20:20:03 -04:00
Josh Holtrop	fbd215098b	Update license years	2022-05-27 21:49:54 -04:00
Josh Holtrop	bfe2916165	Update bundler	2022-05-27 00:15:03 -04:00
Josh Holtrop	c9bc4832f4	bundle update	2022-05-27 00:14:26 -04:00
Josh Holtrop	6dfef8573f	Fix ERB constructor call for Ruby 3.2 warnings	2022-05-27 00:12:40 -04:00
Josh Holtrop	f3ed678fe1	Store tokens in Hash by name	2021-09-27 21:40:12 -04:00
Josh Holtrop	280b749e38	Track Rule IDs	2021-09-27 21:29:44 -04:00
Josh Holtrop	d6779aef00	Start on Parser#build_tables	2021-09-22 23:26:36 -04:00
Josh Holtrop	746ec89be8	Add test for a rule that can be arrived at from multiple states	2021-09-21 21:40:11 -04:00
Josh Holtrop	997f34a1e4	Keep track of item set in-links	2021-09-21 21:32:18 -04:00
Josh Holtrop	a2795bb531	Keep track of follow item sets by symbol for each item set	2021-09-21 17:09:53 -04:00
Josh Holtrop	850e639e3a	update identical rule spec to use lookahead symbol	2021-09-06 20:18:17 -04:00
Josh Holtrop	5f7e548fe3	Remove Rule::Pattern, Item stores a Rule reference	2021-09-06 19:41:29 -04:00
Josh Holtrop	bdb10e7afc	test duplicate rules	2021-09-05 09:50:04 -04:00
Josh Holtrop	7bdaf7cdbc	Do not create item set following EOF token	2021-09-05 07:51:59 -04:00
Josh Holtrop	08e3516ad9	Add wikipedia LR(0) parser example test	2021-09-04 22:33:34 -04:00
Josh Holtrop	2c8f3c6e9a	Avoid infinite loop with self-referential rules	2021-09-04 22:29:10 -04:00
Josh Holtrop	9dffa3c41a	Recursively build item sets	2021-08-29 12:38:44 -04:00
Josh Holtrop	ceb7e9ee32	Add EOF token to Start rule patterns	2021-08-29 11:48:49 -04:00
Josh Holtrop	6026bf1514	Start building following item sets	2021-08-29 09:41:00 -04:00
Josh Holtrop	9cc1890ddc	One Rule object stores all alternative patterns	2021-08-28 10:28:50 -04:00
Josh Holtrop	e4f2fffe50	add Item#closed_items	2021-08-28 09:47:01 -04:00
Josh Holtrop	d931bcb513	Do not expand rules	2021-08-28 09:23:08 -04:00
Josh Holtrop	2e16b0bd6e	Start on Item and ItemSet	2021-08-28 09:02:19 -04:00
Josh Holtrop	6ce94e15af	Expand rules	2021-08-28 08:11:06 -04:00
Josh Holtrop	3f92ae46c4	Map rule components to Token/Rule references	2021-08-22 21:21:41 -04:00
Josh Holtrop	00016f16b3	Combine Grammar and Generator into top-level Imbecile class	2021-08-22 21:04:46 -04:00
Josh Holtrop	9273bfccf6	Move Token/Rule out of Grammar class	2021-08-19 20:00:40 -04:00
Josh Holtrop	f295acb593	Generator builds a Lexer, not a Lexer::DFA	2021-08-19 13:11:12 -04:00
Josh Holtrop	51a31317a6	Move FA#build_tables to Lexer::DFA	2021-08-19 11:55:34 -04:00
Josh Holtrop	9459883e74	Add Lexer class; Move LexerDFA to Lexer::DFA	2021-08-18 17:09:45 -04:00
Josh Holtrop	28591907c1	Move FA class out of Regex class	2021-08-18 17:05:03 -04:00
Josh Holtrop	37d6917b49	Rework Rule constructor	2021-07-27 21:22:46 -04:00
Josh Holtrop	2685c05360	Change rule syntax	2021-07-19 21:55:08 -04:00
Josh Holtrop	c0c3353fd7	Test lexing empty null string returns EOF	2021-07-06 12:06:07 -04:00
Josh Holtrop	3158e51059	Add length field to LexedToken	2021-07-06 11:59:35 -04:00
Josh Holtrop	d9e4f64d2e	Fix returning TOKEN_EOF when lexing at EOF	2021-07-06 11:55:44 -04:00
Josh Holtrop	ec2dcf9a72	Fix not progressing through input while lexing a token	2021-07-06 11:47:33 -04:00
Josh Holtrop	578e165e2d	Fix off-by-one error in state IDs	2021-07-06 11:44:03 -04:00
Josh Holtrop	e8df4296cc	Begin testing lexer	2021-07-06 11:09:39 -04:00
Josh Holtrop	230c324209	Fix iterating through all transitions in a state	2021-07-06 11:09:13 -04:00
Josh Holtrop	1271e19b50	Test multi-byte code point decoding	2021-07-06 11:02:43 -04:00
Josh Holtrop	12e11399af	Add decoder tests	2021-07-06 10:57:06 -04:00
Josh Holtrop	24fab8515d	Decoder.decode_code_point returns struct with code point and length together	2021-07-06 10:50:32 -04:00
Josh Holtrop	1dcdd87a28	Generate token constants and names to top-level parser class	2021-07-06 10:28:35 -04:00
Josh Holtrop	8aec7ec0de	Lexer class can be used standalone	2021-07-06 10:15:07 -04:00
Josh Holtrop	c96d55b031	Fix class name	2021-07-06 10:14:14 -04:00
Josh Holtrop	ca7d4862f9	Run test executable; build with unit tests	2021-07-06 10:03:42 -04:00
Josh Holtrop	3c874ae4c1	Compile generated parser with a test file	2021-07-05 23:05:55 -04:00
Josh Holtrop	748c219625	Do not return dropped tokens from Lexer.lex_token()	2021-07-05 22:53:58 -04:00
Josh Holtrop	71ee7de9f9	Remove obsolete lex() and lex_token() methods	2021-07-05 22:49:50 -04:00
Josh Holtrop	2121acc87e	Complete Lexer.lex_token()	2021-07-05 22:41:09 -04:00
Josh Holtrop	f2563cf255	Work on Lexer.lex_token()	2021-07-05 22:02:27 -04:00
Josh Holtrop	24d12be3b9	Add TOKEN enum entries for EOF, decode error, drop, and none	2021-07-05 20:11:55 -04:00
Josh Holtrop	91d6ee25ea	Add Lexer class	2021-07-05 19:13:41 -04:00
Josh Holtrop	2f1cb47bea	Add Decoder class to decode code points	2021-07-05 18:47:10 -04:00
Josh Holtrop	651461c570	Start on decode_code_point()	2021-06-29 23:17:44 -04:00
Josh Holtrop	3ce54bd303	Start on lex()/lex_token()	2021-06-29 23:10:40 -04:00
Josh Holtrop	15454f926a	Add TokenNames array	2021-06-29 22:54:24 -04:00
Josh Holtrop	4beb3d2016	Add some token constants	2021-06-27 23:09:42 -04:00
Josh Holtrop	aae7bc188c	Use unsigned literals	2021-06-26 18:11:20 -04:00
Josh Holtrop	a716dedeb6	Start on test framework to compile and run generated parser	2021-06-26 16:17:24 -04:00
Josh Holtrop	93cb25df62	Do not generate token names for drop tokens	2021-06-26 16:16:18 -04:00
Josh Holtrop	61dd5bc5a0	Move imbecile_spec to lexer_dfa_spec	2021-06-26 16:01:49 -04:00
Josh Holtrop	10a8ef5eb4	Update generated lexer state and transition tables	2021-06-26 15:58:36 -04:00
Josh Holtrop	98584ce07a	Add FA#build_tables	2021-06-24 15:06:10 -04:00
Josh Holtrop	2122ca02fe	Start generating lexer states and transitions	2021-06-23 23:15:02 -04:00
Josh Holtrop	5881f13380	Generate enum of token identifiers	2021-06-23 22:22:45 -04:00
Josh Holtrop	ebc1d8f001	Fix FA#to_s to show correct destination state	2021-06-23 22:21:53 -04:00
Josh Holtrop	5fecd5c6a2	Refactor into FA#enumerate	2021-06-22 22:01:39 -04:00
Josh Holtrop	5b688b090d	Add some attr_readers	2021-06-21 22:52:27 -04:00
Josh Holtrop	f77218801f	Error if Start rule not found	2021-06-21 22:48:17 -04:00
Josh Holtrop	70118dd019	Check for duplicate token/rule names in Generator	2021-06-21 22:34:43 -04:00
Josh Holtrop	d552f2a540	CLI: accept --log option	2021-06-19 12:06:02 -04:00
Josh Holtrop	d2fac07249	Add Generator class	2021-06-15 16:51:36 -04:00
Josh Holtrop	a34272dfd6	Add Grammar::Rule class	2021-06-14 22:49:43 -04:00
Josh Holtrop	9d05861819	Parse grammar input by multiline regex	2021-06-12 22:57:32 -04:00
Josh Holtrop	03035a25a5	Update spec task to accept an example pattern	2021-06-12 22:46:13 -04:00
Josh Holtrop	db70f8b94d	Add "drop" grammar keyword to drop patterns	2021-06-09 22:48:30 -04:00
Josh Holtrop	f67dd62b20	Add \s to expand to whitespace characters	2021-06-09 22:37:00 -04:00
Josh Holtrop	c6bac6d3a1	Rename TokenDFA -> LexerDFA	2021-06-08 13:54:46 -04:00
Josh Holtrop	aa92970c31	Add some lexer tests	2021-06-07 22:21:52 -04:00
Josh Holtrop	b8282e748e	Start on a test lexer for lexer specs	2021-06-07 17:17:37 -04:00
Josh Holtrop	930ac56148	Do not accept 0-length tokens	2021-06-06 15:29:30 -04:00
Josh Holtrop	7f54778ba8	Rename Regex::DFA to TokenDFA	2021-06-06 15:18:21 -04:00
Josh Holtrop	701903def2	Token should build its own NFA	2021-06-06 14:09:28 -04:00
Josh Holtrop	afea886ecb	Add Grammar::Token class	2021-06-06 14:04:33 -04:00
Josh Holtrop	03b2e87186	Grammar takes in input string instead of file name	2021-06-06 10:09:53 -04:00
Josh Holtrop	e4370cac62	Print accepting token in FA#to_s	2021-06-06 09:59:28 -04:00
Josh Holtrop	ed3f599e25	Create common FA/State/Transition classes across NFA/DFA	2021-06-06 09:41:23 -04:00
Josh Holtrop	1228a76c55	Fix MultiplicityUnit#to_nfa again	2021-05-26 10:17:03 -04:00
Josh Holtrop	538e360cb3	Fix MultiplicityUnit#to_nfa	2021-05-25 16:59:22 -04:00
Josh Holtrop	e7f8c3726c	Fix NFA#to_s	2021-05-25 16:14:19 -04:00
Josh Holtrop	b6e3a5c151	Record accepting token in DFA state	2021-05-25 16:00:25 -04:00
Josh Holtrop	35ef94dbd3	Print out DFA to test	2021-05-25 15:52:47 -04:00
Josh Holtrop	37e1252ded	Continue building DFA	2021-05-25 15:44:23 -04:00
Josh Holtrop	214ece7d90	Add NFA::Transition, start on DFA construction	2021-05-23 21:41:50 -04:00
Josh Holtrop	8473df421a	Add specs for CodePointRange	2021-05-23 20:41:40 -04:00
Josh Holtrop	3987f08cd7	Add CodePointRange class	2021-05-23 17:52:20 -04:00
Josh Holtrop	3a1650906e	Show non-printable characters better in NFA#to_s	2021-05-21 14:39:02 -04:00
Josh Holtrop	952bffc33c	Move DFA#nil_transition_states to NFA::State	2021-05-21 14:27:42 -04:00
Josh Holtrop	f64f3683c6	Add NFA#to_s	2021-05-21 14:24:16 -04:00
Josh Holtrop	43f5caf449	Fix some NFA creation	2021-05-20 17:34:18 -04:00
Josh Holtrop	f38a7456e9	Add DFA#nil_transition_states	2021-05-20 17:08:34 -04:00
Josh Holtrop	c77c81bf25	Mark regex NFA end state as accepting the token	2021-05-18 16:34:26 -04:00
Josh Holtrop	7196a0605a	Add DFA class	2021-05-18 16:31:16 -04:00
Josh Holtrop	24054461a2	Merge Regex::Parser into Regex, move Unit to its own file	2021-05-18 16:14:42 -04:00
Josh Holtrop	89a5976064	Make Regex::Parser build a NFA after parsing	2021-05-18 16:07:39 -04:00
Josh Holtrop	d3df67be1e	Update rake	2021-05-18 16:03:14 -04:00
Josh Holtrop	791340b292	Build NFA for each token pattern	2021-05-17 22:57:18 -04:00
Josh Holtrop	cf8718b69c	Allow token definition with no pattern	2021-05-17 22:40:23 -04:00
Josh Holtrop	39f164a7db	Parse . in a regex	2021-05-17 17:20:56 -04:00
Josh Holtrop	70b3e56de2	Store all characters as ranges; add CharacterClassUnit#to_nfa	2021-05-14 13:52:03 -04:00
Josh Holtrop	2e8e72a1e8	Add CharacterClassUnit and use it instead of AlternatesUnit	2021-05-14 12:32:53 -04:00
Josh Holtrop	ea27baa630	Add #to_nfa for other regex unit types	2021-05-13 15:57:09 -04:00
Josh Holtrop	d8dd64d860	Add NFA class Start converting units to NFAs	2021-05-13 00:01:12 -04:00
Josh Holtrop	54cefda186	Use Parser	2021-05-11 16:52:28 -04:00
Josh Holtrop	201a38fb51	Add Parser specs	2021-05-11 15:29:40 -04:00
Josh Holtrop	33f9d01883	Rename start/end to min/max for CharacterRangeUnit	2021-05-11 15:28:45 -04:00
Josh Holtrop	9b09625c8a	Fix parsing - at beginning of negated character class	2021-05-11 14:57:16 -04:00
Josh Holtrop	6119d860bc	Fix character class parsing into an AlternatesUnit	2021-05-11 14:57:01 -04:00
Josh Holtrop	611ebeeddd	Fix max multiplicity count parsing	2021-05-11 11:37:46 -04:00
Josh Holtrop	449eec4982	Fix multiplicity count parsing	2021-05-11 11:33:10 -04:00
Josh Holtrop	8cd648fc8f	Create spec file for Parser	2021-05-07 16:58:38 -04:00
Josh Holtrop	885ef6c151	Rename Regex::Unit -> Regex::Parser	2021-05-07 16:57:05 -04:00
Josh Holtrop	60adffbbab	Add rspec	2021-05-07 15:16:01 -04:00
Josh Holtrop	b8c01ca1d1	Move Unit stuff from Imbecile::Regex to Imbecile::Regex::Unit	2021-05-07 15:10:51 -04:00
Josh Holtrop	b04ff56308	Add Regex class	2021-05-02 15:22:45 -04:00
Josh Holtrop	ca1d2d1e5c	Fix class name determination from output file name	2021-05-01 17:01:15 -04:00
Josh Holtrop	13403405b0	Add Error class to handle grammar loading errors	2021-05-01 16:54:24 -04:00
Josh Holtrop	07dd68e367	Write output file from ERB template	2021-05-01 16:44:01 -04:00
Josh Holtrop	c1666a1e74	Require output file on command line	2021-05-01 14:52:16 -04:00
Josh Holtrop	768a0ef17f	Extract class name from grammar file	2021-05-01 14:34:00 -04:00
Josh Holtrop	9e865d1982	Throw error on unexpected grammar input line	2021-05-01 09:40:22 -04:00
Josh Holtrop	9884047090	Skip blank lines	2021-05-01 09:39:19 -04:00
Josh Holtrop	04393dcc51	Check for duplicate token names; skip comment lines	2021-05-01 09:38:08 -04:00
Josh Holtrop	7f27b3fd6f	Exit with CLI exit code	2021-05-01 09:34:38 -04:00
Josh Holtrop	37ad87d602	Rename GrammarParser -> Grammar	2021-05-01 09:33:35 -04:00
Josh Holtrop	23b7782a5d	Begin parsing grammar	2021-05-01 09:31:12 -04:00
Josh Holtrop	0cc4516c0e	Add GrammarParser class to parse input file	2021-05-01 08:22:14 -04:00
Josh Holtrop	75a1049040	Parse command-line options	2021-05-01 08:16:09 -04:00
Josh Holtrop	a9ff93dda4	Add script to test run	2021-05-01 08:16:01 -04:00
Josh Holtrop	d879a93d09	Add bin/imbecile and Imbecile::CLI module	2021-04-29 23:26:52 -04:00
Josh Holtrop	ee27c5e9b1	Add Gemfile.lock	2021-04-29 23:26:37 -04:00
Josh Holtrop	989e5f47de	Edit some gemspec fields	2021-04-29 23:26:29 -04:00
Josh Holtrop	04e17cde30	Add "bundle gem"-generated files	2021-04-29 23:22:23 -04:00
Josh Holtrop	bc217e7ddb	Start on ruby branch	2021-04-29 23:18:22 -04:00
				`@ -1 +0,0 @@`
				`Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef`