Update README

Rename to propane
Update license years
2022-05-30 15:40:31 -04:00 · 2022-05-28 20:20:03 -04:00 · 2022-05-27 21:49:54 -04:00 · 2022-05-27 00:15:03 -04:00 · 2022-05-27 00:14:26 -04:00 · 2022-05-27 00:12:40 -04:00
51 changed files with 2313 additions and 1317 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,10 @@
-imbecile
+/.bundle/
-tags
+/.yardoc
-*.o
+/_yardoc/
-.*.swp
+/coverage/
-*.dep
+/doc/
-tmpl.*
+/pkg/
-tests/*/itest.cc
+/spec/reports/
-tests/*/itest.h
+/tmp/
-tests/*/test
+/.rspec_status
 /spec/run/
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
 [submodule "refptr"]
 	path = refptr
 	url = http://github.com/holtrop/refptr.git
--- a/.rspec
+++ b/.rspec
@ -0,0 +1,3 @@
 --format documentation
 --color
 --require spec_helper
--- a/4
+++ b/4
@ -0,0 +1,4 @@
 source "https://rubygems.org"
 gem "rake"
 gem "rspec"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -0,0 +1,28 @@
 GEM
  remote: https://rubygems.org/
  specs:
    diff-lcs (1.5.0)
    rake (13.0.6)
    rspec (3.11.0)
      rspec-core (~> 3.11.0)
      rspec-expectations (~> 3.11.0)
      rspec-mocks (~> 3.11.0)
    rspec-core (3.11.0)
      rspec-support (~> 3.11.0)
    rspec-expectations (3.11.0)
      diff-lcs (>= 1.2.0, < 2.0)
      rspec-support (~> 3.11.0)
    rspec-mocks (3.11.1)
      diff-lcs (>= 1.2.0, < 2.0)
      rspec-support (~> 3.11.0)
    rspec-support (3.11.0)
 PLATFORMS
  ruby
 DEPENDENCIES
  rake
  rspec
 BUNDLED WITH
   2.4.0.dev
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -0,0 +1,21 @@
 The MIT License (MIT)
 Copyright (c) 2010-2022 Josh Holtrop
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
--- a/61
+++ b/61
@ -1,61 +0,0 @@
 TARGET   := imbecile
 CXXOBJS  := $(patsubst %.cc,%.o,$(wildcard *.cc)) tmpl.o
 CXXDEPS  := $(patsubst %.o,.%.dep,$(CXXOBJS))
 CXXFLAGS := -O2
 DEPS     := $(CXXDEPS)
 OBJS     := $(CXXOBJS)
 LDFLAGS  := -lpcre
 CPPFLAGS := -I$(shell pwd)/refptr
 all: submodule_check tmpl.h $(TARGET)
 .PHONY: submodule_check
 submodule_check:
 	@if [ ! -e refptr/refptr.h ]; then \
 		echo Error: \"refptr\" folder is not populated.; \
 		echo Perhaps you forgot to do \"git checkout --recursive\"?; \
 		echo You can remedy the situation with \"git submodule update --init\".; \
 		exit 1; \
 	fi
 $(TARGET): $(OBJS)
 	$(CXX) -o $@ $^ $(LDFLAGS)
 # Object file rules
 %.o: %.cc
 	$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
 # Make dependency files
 .%.dep: %.c
 	@set -e; rm -f $@; \
 	  $(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
 .%.dep: %.cc tmpl.h
 	@set -e; rm -f $@; \
 	  $(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
 tmpl.cc: $(wildcard tmpl/*)
 	echo -n > $@
 	for f in $*/*; \
 		do xxd -i $$f >> $@; \
 	done
 tmpl.h: tmpl.cc
 	echo '#ifndef $*_h' > $@
 	echo '#define $*_h' >> $@
 	grep '$*_' $^ | sed -e 's/^/extern /' -e 's/ =.*/;/' >> $@
 	echo '#endif' >> $@
 .PHONY: tests
 tests: PATH := $(shell pwd):$(PATH)
 tests: all
 	$(MAKE) -C $@
 tests-clean:
 	$(MAKE) -C tests clean
 clean: tests-clean
 	-rm -f $(TARGET) *.o .*.dep tmpl.cc tmpl.h
 -include $(CXXDEPS)
--- a/Parser.cc
+++ b/Parser.cc
@ -1,423 +0,0 @@
 #include <stdio.h>
 #include <string.h>
 #include <pcre.h>
 #include <ctype.h>                  /* toupper() */
 #include <iostream>
 #include <fstream>
 #include <string>
 #include <map>
 #include "Parser.h"
 #include "TokenDefinition.h"
 #include "RuleDefinition.h"
 #include "tmpl.h"
 using namespace std;
 #define DEBUG
 Parser::Parser()
    : m_classname("Parser"), m_namespace(""), m_extension("cc"),
    m_token_data(new string()), m_token_code(new string()),
    m_defines(new string())
 {
 }
 void Parser::makeDefine(const string & defname, const string & definition)
 {
    *m_defines += string("#define ") + defname + " " + definition + "\n";
 }
 bool Parser::write(const string & fname)
 {
    if (m_tokens.size() < 1 || m_rules.size() < 1)
        return false;
    string header_fname = fname + ".h";
    string body_fname = fname + "." + m_extension;
    ofstream header(header_fname.c_str());
    ofstream body(body_fname.c_str());
    /* process data */
    refptr<string> token_classes = new string();
    refptr<string> token_classes_code = new string();
    int i = 0;
    for (list<TokenDefinitionRef>::const_iterator it = m_tokens.begin();
            it != m_tokens.end();
            it++)
    {
        char buff[20];
        sprintf(buff, "%d", i++);
        makeDefine((*it)->getIdentifier(), buff);
        *token_classes += (*it)->getClassDefinition();
        *token_classes_code += (*it)->getProcessMethod();
    }
    if (m_namespace != "")
    {
        makeDefine("I_NAMESPACE", m_namespace);
    }
    makeDefine("I_CLASSNAME", m_classname);
    /* set up replacements */
    setReplacement("token_list", buildTokenList());
    setReplacement("buildToken", buildBuildToken());
    setReplacement("header_name",
            new string(string("\"") + header_fname + "\""));
    setReplacement("token_code", m_token_code);
    setReplacement("token_data", m_token_data);
    setReplacement("defines", m_defines);
    setReplacement("token_classes", token_classes);
    setReplacement("token_classes_code", token_classes_code);
    /* write the header */
    writeTmpl(header, (char *) tmpl_parser_h, tmpl_parser_h_len);
    /* write the body */
    writeTmpl(body, (char *) tmpl_parser_cc, tmpl_parser_cc_len);
    header.close();
    body.close();
    return true;
 }
 bool Parser::writeTmpl(std::ostream & out, char * dat, int len)
 {
    char * newline;
    char * data = dat;
    const char * errptr;
    int erroffset;
    data[len-1] = '\n';
    const int ovec_size = 6;
    int ovector[ovec_size];
    pcre * replace = pcre_compile("{%(\\w+)%}", 0, &errptr, &erroffset, NULL);
    while (data < (dat + len) && (newline = strstr(data, "\n")) != NULL)
    {
        if (pcre_exec(replace, NULL, data, newline - data,
                    0, 0, ovector, ovec_size) >= 0)
        {
            if (ovector[0] > 0)
            {
                out.write(data, ovector[0]);
            }
            out << *getReplacement(string(data, ovector[2],
                        ovector[3] - ovector[2]));
            if (ovector[1] < newline - data)
            {
                out.write(data + ovector[1], newline - data - ovector[1]);
            }
        }
        else
        {
            out.write(data, newline - data);
        }
        out << '\n';
        data = newline + 1;
    }
 }
 refptr<std::string> Parser::getReplacement(const std::string & name)
 {
    if (m_replacements.find(name) != m_replacements.end())
    {
        return m_replacements[name];
    }
 #ifdef DEBUG
    cerr << "No replacement found for \"" << name << "\"" << endl;
 #endif
    return new string("");
 }
 refptr<string> Parser::buildTokenList()
 {
    refptr<string> tokenlist = new string();
    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
            t != m_tokens.end();
            t++)
    {
        if (t != m_tokens.begin())
            *tokenlist += "    ";
        *tokenlist += "{ \"" + (*t)->getName() + "\", \""
            + (*t)->getCString() + "\", "
            + ((*t)->getProcessFlag() ? "true" : "false") + " }";
        if (({typeof(t) tmp = t; ++tmp;}) != m_tokens.end())
            *tokenlist += ",\n";
    }
    return tokenlist;
 }
 refptr<string> Parser::buildBuildToken()
 {
    refptr<string> buildToken = new string();
    for (list<TokenDefinitionRef>::const_iterator t = m_tokens.begin();
            t != m_tokens.end();
            t++)
    {
        *buildToken += "case " + (*t)->getIdentifier() + ":\n";
        *buildToken += "    token = new " + (*t)->getClassName() + "();\n";
        *buildToken += "    break;\n";
    }
    return buildToken;
 }
 bool Parser::parseInputFile(char * buff, int size)
 {
    typedef pcre * pcre_ptr;
    enum { none, tokens, rules };
    pcre_ptr empty, comment, section_name, token, rule,
             data_begin, data_end, code_begin, code_end;
    struct { pcre_ptr * re; const char * pattern; } exprs[] = {
        {&empty,        "^\\s*$"},
        {&comment,      "^\\s*#"},
        {&section_name, "^\\s*\\[([^\\]]+?)\\]\\s*$"},
        {&token,        "^\\s*"                     /* possible leading ws */
                        "([a-zA-Z_][a-zA-Z_0-9]*)"  /* 1: token name */
                        "\\s+"                      /* required whitespace */
                        "((?:[^\\\\\\s]|\\\\.)+)"}, /* 2: token RE */
        {&rule,         "^\\s*(\\S+)\\s*:=(.*)$"},
        {&data_begin,   "^\\s*\\${"},
        {&data_end,     "\\$}"},
        {&code_begin,   "^\\s*%{"},
        {&code_end,     "%}"}
    };
    const int ovec_size = 3 * 10;
    int ovector[ovec_size];
    int lineno = 0;
    char * newline;
    char * input = buff;
    string current_section_name;
    map<string, int> sections;
    sections["none"] = none;
    sections["tokens"] = tokens;
    sections["rules"] = rules;
    int section = none;
    string line;
    bool append_line = false;
    bool gathering_data = false;
    bool gathering_code = false;
    string gather;
    bool continue_line = false;
    TokenDefinitionRef current_token;
    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
    {
        const char * errptr;
        int erroffset;
        *exprs[i].re = pcre_compile(exprs[i].pattern, 0,
                &errptr, &erroffset, NULL);
        if (*exprs[i].re == NULL)
        {
            cerr << "Error compiling regex '" << exprs[i].pattern <<
                "': " << errptr << " at position " << erroffset << endl;
            return false;
        }
    }
    for (;;)
    {
        if (continue_line)
        {
            continue_line = false;
        }
        else
        {
            if ((newline = strstr(input, "\n")) == NULL)
                break;
            int line_length = newline - input;
            if (line_length >= 1 && newline[-1] == '\r')
            {
                newline[-1] = '\n';
                line_length--;
            }
            lineno++;
            if (append_line)
            {
                line += string(input, line_length);
            }
            else
            {
                line = string(input, line_length);
            }
            input = newline + 1;        /* set up for next loop iteration */
        }
        if ( (pcre_exec(empty, NULL, line.c_str(), line.size(),
                  0, 0, ovector, ovec_size) >= 0)
          || (pcre_exec(comment, NULL, line.c_str(), line.size(),
                  0, 0, ovector, ovec_size) >= 0)
           )
        {
            /* skip empty or comment lines */;
            continue;
        }
        if (! (gathering_code || gathering_data) )
        {
            if (line.size() > 0 && line[line.size()-1] == '\\')
            {
                line[line.size()-1] = ' ';
                append_line = true;
                continue;
            }
            else
            {
                append_line = false;
            }
            if (pcre_exec(section_name, NULL, line.c_str(), line.size(),
                        0, 0, ovector, ovec_size) >= 0)
            {
                current_section_name
                    = string(line, ovector[2], ovector[3] - ovector[2]);
                if (sections.find(current_section_name) != sections.end())
                {
                    section = sections[current_section_name];
                }
                else
                {
                    cerr << "Unknown section name '" << current_section_name
                        << "'!" << endl;
                    return false;
                }
                continue;
            }
        }
        switch (section)
        {
            case none:
                cerr << "Unrecognized input on line " << lineno << endl;
                return false;
            case tokens:
                if      (gathering_data)
                {
                    if (pcre_exec(data_end, NULL, line.c_str(), line.size(),
                                0, 0, ovector, ovec_size) >= 0)
                    {
                        gather += string(line, 0, ovector[0]) + "\n";
                        gathering_data = false;
                        line = string(line, ovector[1]);
                        continue_line = true;
                        if (current_token.isNull())
                        {
                            *m_token_data += gather;
                        }
                        else
                        {
                            current_token->addData(gather);
                        }
                    }
                    else
                    {
                        gather += line + "\n";
                    }
                    continue;
                }
                else if (gathering_code)
                {
                    if (pcre_exec(code_end, NULL, line.c_str(), line.size(),
                                0, 0, ovector, ovec_size) >= 0)
                    {
                        gather += string(line, 0, ovector[0]) + "\n";
                        gathering_code = false;
                        line = string(line, ovector[1]);
                        continue_line = true;
                        if (current_token.isNull())
                        {
                            *m_token_code += gather;
                        }
                        else
                        {
                            current_token->addCode(gather);
                        }
                    }
                    else
                    {
                        gather += line + "\n";
                    }
                    continue;
                }
                else if (pcre_exec(data_begin, NULL, line.c_str(), line.size(),
                            0, 0, ovector, ovec_size) >= 0)
                {
                    gathering_data = true;
                    gather = "";
                    line = string(line, ovector[1]);
                    continue_line = true;
                    continue;
                }
                else if (pcre_exec(code_begin, NULL, line.c_str(), line.size(),
                            0, 0, ovector, ovec_size) >= 0)
                {
                    gathering_code = true;
                    gather = "";
                    line = string(line, ovector[1]);
                    continue_line = true;
                    continue;
                }
                else if (pcre_exec(token, NULL, line.c_str(), line.size(),
                            0, 0, ovector, ovec_size) >= 0)
                {
                    string name(line, ovector[2], ovector[3] - ovector[2]);
                    string definition(line,
                            ovector[4], ovector[5] - ovector[4]);
                    current_token = new TokenDefinition();
                    if (current_token->create(name, definition))
                    {
                        addTokenDefinition(current_token);
                    }
                    else
                    {
                        cerr << "Error in token definition ending on line "
                            << lineno << endl;
                        return false;
                    }
                    line = string(line, ovector[1]);
                    continue_line = true;
                    continue;
                }
                else
                {
                    cerr << "Unrecognized input on line " << lineno << endl;
                    return false;
                }
                break;
            case rules:
                if (pcre_exec(rule, NULL, line.c_str(), line.size(),
                            0, 0, ovector, ovec_size) >= 0)
                {
                    string name(line, ovector[2], ovector[3] - ovector[2]);
                    string definition(line,
                            ovector[4], ovector[5] - ovector[4]);
                    refptr<RuleDefinition> rd = new RuleDefinition();
                    if (rd->create(name, definition))
                    {
                        addRuleDefinition(rd);
                    }
                    else
                    {
                        cerr << "Error in rule definition ending on line "
                            << lineno << endl;
                        return false;
                    }
                }
                else
                {
                    cerr << "Unrecognized input on line " << lineno << endl;
                    return false;
                }
                break;
        }
    }
    for (int i = 0; i < sizeof(exprs)/sizeof(exprs[0]); i++)
    {
        pcre_free(*exprs[i].re);
    }
    return true;
 }
--- a/Parser.h
+++ b/Parser.h
@ -1,61 +0,0 @@
 #ifndef PARSER_H
 #define PARSER_H
 #include <vector>
 #include <string>
 #include <list>
 #include <map>
 #include "refptr.h"
 #include "TokenDefinition.h"
 #include "RuleDefinition.h"
 class Parser
 {
    public:
        Parser();
        void addTokenDefinition(refptr<TokenDefinition> td)
        {
            m_tokens.push_back(td);
        }
        void addRuleDefinition(refptr<RuleDefinition> rd)
        {
            m_rules.push_back(rd);
        }
        bool write(const std::string & fname);
        bool parseInputFile(char * buff, int size);
        void setClassName(const std::string & cn) { m_classname = cn; }
        std::string getClassName() { return m_classname; }
        void setNamespace(const std::string & ns) { m_namespace = ns; }
        std::string getNamespace() { return m_namespace; }
        void setExtension(const std::string & e) { m_extension = e; }
        std::string getExtension() { return m_extension; }
    protected:
        refptr<std::string> buildTokenList();
        refptr<std::string> buildBuildToken();
        bool writeTmpl(std::ostream & out, char * dat, int len);
        refptr<std::string> getReplacement(const std::string & name);
        void setReplacement(const std::string & name, refptr<std::string> val)
        {
            m_replacements[name] = val;
        }
        void makeDefine(const std::string & defname,
                const std::string & definition);
        std::list<TokenDefinitionRef> m_tokens;
        std::vector< refptr< RuleDefinition > > m_rules;
        std::string m_classname;
        std::string m_namespace;
        std::string m_extension;
        std::map< std::string, refptr<std::string> > m_replacements;
        refptr<std::string> m_token_data;
        refptr<std::string> m_token_code;
        refptr<std::string> m_defines;
 };
 #endif
--- a/5
+++ b/5
@ -1,5 +0,0 @@
 Imbecile is a bottom-up parser generator. It targets C++ and automatically
 generates a class heirarchy for interacting with the parser.
 Imbecile generates both a lexer and a parser based on the rules given to
 it in the input file.
--- a/README.md
+++ b/README.md
@ -0,0 +1,31 @@
 # The Propane Parser Generator
 Propane is an LR Parser Generator (LPG) which:
  * accepts LR(0), SLR, and LALR grammars
  * generates a built-in lexer to tokenize input
  * supports UTF-8 lexer inputs
  * generates a table-driven parser to parse input in linear time
  * is MIT-licensed
  * is distributable as a standalone Ruby script
 ## Installation
 TODO
 ## Usage
 TODO: Write usage instructions here
 ## Development
 After checking out the repository, run `bundle install` to install dependencies.
 Run `rake spec` to execute tests.
 ## Contributing
 Bug reports and pull requests are welcome on GitHub at https://github.com/holtrop/propane.
 ## License
 Propane is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 require "rspec/core/rake_task"
 RSpec::Core::RakeTask.new(:spec, :example_pattern) do |task, args|
  if args.example_pattern
    task.rspec_opts = %W[-e "#{args.example_pattern}" -f documentation]
  end
 end
 task :default => :spec
--- a/RuleDefinition.cc
+++ b/RuleDefinition.cc
@ -1,9 +0,0 @@
 #include "RuleDefinition.h"
 using namespace std;
 bool RuleDefinition::create(const string & name, const string & definition)
 {
    m_name = name;
 }
--- a/RuleDefinition.h
+++ b/RuleDefinition.h
@ -1,16 +0,0 @@
 #ifndef RULEDEFINITION_H
 #define RULEDEFINITION_H
 #include <string>
 class RuleDefinition
 {
    public:
        bool create(const std::string & name, const std::string & definition);
    protected:
        std::string m_name;
 };
 #endif
--- a/TokenDefinition.cc
+++ b/TokenDefinition.cc
@ -1,125 +0,0 @@
 #include <pcre.h>
 #include <iostream>
 #include <string>
 #include <vector>
 #include "TokenDefinition.h"
 #include "refptr.h"
 using namespace std;
 #define WHITESPACE " \n\r\t\v"
 static string trim(string s)
 {
    size_t lastpos = s.find_last_not_of(WHITESPACE);
    if (lastpos == string::npos)
        return "";
    s.erase(lastpos + 1);
    s.erase(0, s.find_first_not_of(WHITESPACE));
    return s;
 }
 static refptr< vector<string> > split(const string & delim, string str)
 {
    refptr< vector<string> > ret = new vector<string>();
    size_t pos;
    while ( (pos = str.find(delim)) != string::npos )
    {
        string t = str.substr(0, pos);
        ret->push_back(t);
        str.erase(0, pos + 1);
    }
    if (str != "")
        ret->push_back(str);
    return ret;
 }
 static string c_escape(const string & orig)
 {
    string result;
    for (string::const_iterator it = orig.begin(); it != orig.end(); it++)
    {
        if (*it == '\\' || *it == '"')
            result += '\\';
        result += *it;
    }
    return result;
 }
 TokenDefinition::TokenDefinition()
    : m_process(false)
 {
 }
 bool TokenDefinition::create(const string & name,
        const string & definition)
 {
    const char * errptr;
    int erroffset;
    pcre * re = pcre_compile(definition.c_str(), 0, &errptr, &erroffset, NULL);
    if (re == NULL)
    {
        cerr << "Error compiling regular expression '" << definition
            << "' at position " << erroffset << ": " << errptr << endl;
        return false;
    }
    m_name = name;
    m_definition = definition;
    pcre_free(re);
 #if 0
    refptr< vector< string > > parts = split(",", flags);
    for (int i = 0, sz = parts->size(); i < sz; i++)
    {
        (*parts)[i] = trim((*parts)[i]);
        string & s = (*parts)[i];
        if (s == "p")
        {
            m_process = true;
        }
        else
        {
            cerr << "Unknown token flag \"" << s << "\"" << endl;
            return false;
        }
    }
 #endif
    return true;
 }
 string TokenDefinition::getCString() const
 {
    return c_escape(m_definition);
 }
 string TokenDefinition::getClassDefinition() const
 {
    string ret = "class "+ getClassName() + " : public Token {\n";
    ret += "public:\n";
    if (m_process)
    {
        ret += "    virtual void process(const Matches & matches);\n";
    }
    ret += "\n";
    ret += "protected:\n";
    ret += m_data + "\n";
    ret += "};\n";
    return ret;
 }
 string TokenDefinition::getProcessMethod() const
 {
    string ret;
    if (m_code != "")
    {
        ret += "void " + getClassName() + "::process(const Matches & matches) {\n";
        ret += m_code + "\n";
        ret += "}\n";
    }
    return ret;
 }
--- a/TokenDefinition.h
+++ b/TokenDefinition.h
@ -1,37 +0,0 @@
 #ifndef TOKENDEFINITION_H
 #define TOKENDEFINITION_H
 #include <string>
 #include "refptr.h"
 class TokenDefinition
 {
    public:
        TokenDefinition();
        bool create(const std::string & name,
                const std::string & definition);
        std::string getCString() const;
        std::string getName() const { return m_name; }
        bool getProcessFlag() const { return m_process; }
        void setProcessFlag(bool p) { m_process = p; }
        void addData(const std::string & d) { m_data += d; }
        std::string getData() const { return m_data; }
        void addCode(const std::string & c) { m_code += c; m_process = true; }
        std::string getCode() const { return m_code; }
        std::string getClassDefinition() const;
        std::string getProcessMethod() const;
        std::string getIdentifier() const { return "TK_" + m_name; }
        std::string getClassName() const { return "Tk" + m_name; }
    protected:
        std::string m_name;
        std::string m_definition;
        bool m_process;
        std::string m_data;
        std::string m_code;
 };
 typedef refptr<TokenDefinition> TokenDefinitionRef;
 #endif
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -0,0 +1,252 @@
 <% if @modulename %>
 module <%= @modulename %>;
 <% end %>
 class <%= classname %>
 {
    enum
    {
 <% @tokens.each_with_index do |(name, token), index| %>
 <%   if token.name %>
        TOKEN_<%= token.c_name %> = <%= index %>,
 <%   end %>
 <% end %>
        TOKEN_EOF = <%= TOKEN_EOF %>,
        TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
        TOKEN_DROP = <%= TOKEN_DROP %>,
        TOKEN_NONE = <%= TOKEN_NONE %>,
    }
    static immutable string TokenNames[] = [
 <% @tokens.each_with_index do |(name, token), index| %>
 <%   if token.name %>
        "<%= token.name %>",
 <%   else %>
        null,
 <%   end %>
 <% end %>
    ];
    static class Decoder
    {
        enum
        {
            CODE_POINT_INVALID = 0xFFFFFFFE,
            CODE_POINT_EOF = 0xFFFFFFFF,
        }
        struct DecodedCodePoint
        {
            uint code_point;
            uint code_point_length;
        }
        static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length)
        {
            if (input_length == 0u)
            {
                return DecodedCodePoint(CODE_POINT_EOF, 0u);
            }
            ubyte c = *input;
            uint code_point;
            uint code_point_length;
            if ((c & 0x80u) == 0u)
            {
                code_point = c;
                code_point_length = 1u;
            }
            else
            {
                ubyte following_bytes;
                if ((c & 0xE0u) == 0xC0u)
                {
                    code_point = c & 0x1Fu;
                    following_bytes = 1u;
                }
                else if ((c & 0xF0u) == 0xE0u)
                {
                    code_point = c & 0x0Fu;
                    following_bytes = 2u;
                }
                else if ((c & 0xF8u) == 0xF0u)
                {
                    code_point = c & 0x07u;
                    following_bytes = 3u;
                }
                else if ((c & 0xFCu) == 0xF8u)
                {
                    code_point = c & 0x03u;
                    following_bytes = 4u;
                }
                else if ((c & 0xFEu) == 0xFCu)
                {
                    code_point = c & 0x01u;
                    following_bytes = 5u;
                }
                if (input_length <= following_bytes)
                {
                    return DecodedCodePoint(CODE_POINT_INVALID, 0u);
                }
                code_point_length = following_bytes + 1u;
                while (following_bytes-- > 0u)
                {
                    input++;
                    code_point <<= 6u;
                    code_point |= *input & 0x3Fu;
                }
            }
            return DecodedCodePoint(code_point, code_point_length);
        }
    }
    static class Lexer
    {
        private struct Transition
        {
            uint first;
            uint last;
            uint destination;
        }
        private struct State
        {
            uint transition_table_index;
            uint n_transitions;
            uint accepts;
        }
 <% transition_table, state_table = lexer.dfa.build_tables %>
        private static const Transition transitions[] = [
 <% transition_table.each do |transition_table_entry| %>
            Transition(<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u),
 <% end %>
        ];
        private static const State states[] = [
 <% state_table.each do |state_table_entry| %>
            State(<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:accepts] %>u),
 <% end %>
        ];
        struct LexedToken
        {
            size_t row;
            size_t col;
            size_t length;
            uint token;
        }
        private const(ubyte) * m_input;
        private size_t m_input_length;
        private size_t m_input_position;
        private size_t m_input_row;
        private size_t m_input_col;
        this(const(ubyte) * input, size_t input_length)
        {
            m_input = input;
            m_input_length = input_length;
        }
        LexedToken lex_token()
        {
            for (;;)
            {
                LexedToken lt = attempt_lex_token();
                if (lt.token != TOKEN_DROP)
                {
                    return lt;
                }
            }
        }
        private LexedToken attempt_lex_token()
        {
            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, TOKEN_NONE);
            struct LexedTokenState
            {
                size_t length;
                size_t delta_row;
                size_t delta_col;
                uint token;
            }
            LexedTokenState last_accepts_info;
            last_accepts_info.token = TOKEN_NONE;
            LexedTokenState attempt_info;
            uint current_state;
            for (;;)
            {
                auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_info.length], m_input_length - m_input_position - attempt_info.length);
                if (decoded.code_point == Decoder.CODE_POINT_INVALID)
                {
                    lt.token = TOKEN_DECODE_ERROR;
                    return lt;
                }
                bool lex_continue = false;
                if (decoded.code_point != Decoder.CODE_POINT_EOF)
                {
                    uint dest = transition(current_state, decoded.code_point);
                    if (dest != cast(uint)-1)
                    {
                        lex_continue = true;
                        attempt_info.length += decoded.code_point_length;
                        if (decoded.code_point == '\n')
                        {
                            attempt_info.delta_row++;
                            attempt_info.delta_col = 0u;
                        }
                        else
                        {
                            attempt_info.delta_col++;
                        }
                        current_state = dest;
                        if (states[current_state].accepts != TOKEN_NONE)
                        {
                            attempt_info.token = states[current_state].accepts;
                            last_accepts_info = attempt_info;
                        }
                    }
                }
                else if (attempt_info.length == 0u)
                {
                    lt.token = TOKEN_EOF;
                    break;
                }
                if (!lex_continue)
                {
                    if (last_accepts_info.token != TOKEN_NONE)
                    {
                        lt.token = last_accepts_info.token;
                        lt.length = last_accepts_info.length;
                        m_input_position += last_accepts_info.length;
                        m_input_row += last_accepts_info.delta_row;
                        if (last_accepts_info.delta_row != 0u)
                        {
                            m_input_col = last_accepts_info.delta_col;
                        }
                        else
                        {
                            m_input_col += last_accepts_info.delta_col;
                        }
                    }
                    break;
                }
            }
            return lt;
        }
        private uint transition(uint current_state, uint code_point)
        {
            uint transition_table_index = states[current_state].transition_table_index;
            for (uint i = 0u; i < states[current_state].n_transitions; i++)
            {
                if ((transitions[transition_table_index + i].first <= code_point) &&
                    (code_point <= transitions[transition_table_index + i].last))
                {
                    return transitions[transition_table_index + i].destination;
                }
            }
            return cast(uint)-1;
        }
    }
 }
--- a/bin/propane
+++ b/bin/propane
@ -0,0 +1,5 @@
 #!/usr/bin/env ruby
 require "propane"
 exit Propane::CLI.run(ARGV.dup)
--- a/imbecile.cc
+++ b/imbecile.cc
@ -1,101 +0,0 @@
 #include <getopt.h>
 #include <iostream>
 #include <fstream>
 #include "refptr.h"
 #include "Parser.h"
 using namespace std;
 string buildOutputFilename(string & input_fname);
 int main(int argc, char * argv[])
 {
    int longind = 1;
    int opt;
    Parser p;
    string outfile;
    static struct option longopts[] = {
        /* name, has_arg, flag, val */
        { "classname", required_argument, NULL, 'c' },
        { "extension", required_argument, NULL, 'e' },
        { "namespace", required_argument, NULL, 'n' },
        { "outfile", required_argument, NULL, 'o' },
        { NULL, 0, NULL, 0 }
    };
    while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
    {
        switch (opt)
        {
            case 'c':   /* classname */
                p.setClassName(optarg);
                break;
            case 'e':   /* extension */
                p.setExtension(optarg);
                break;
            case 'n':   /* namespace */
                p.setNamespace(optarg);
                break;
            case 'o':   /* outfile */
                outfile = optarg;
                break;
        }
    }
    if (optind >= argc)
    {
        cerr << "Usage: imbecile [options] <input-file>" << endl;
        return 1;
    }
    string input_fname = argv[optind];
    ifstream ifs;
    ifs.open(input_fname.c_str(), ios::binary);
    if (!ifs.is_open())
    {
        cerr << "Error opening input file: '" << input_fname << "'";
        return 2;
    }
    ifs.seekg(0, ios_base::end);
    int size = ifs.tellg();
    ifs.seekg(0, ios_base::beg);
    char * buff = new char[size];
    ifs.read(buff, size);
    ifs.close();
    if (outfile == "")
        outfile = buildOutputFilename(input_fname);
    if (!p.parseInputFile(buff, size))
    {
        cerr << "Error parsing " << input_fname << endl;
        return 3;
    }
    if (!p.write(outfile))
    {
        cerr << "Error processing " << input_fname << endl;
        return 4;
    }
    delete[] buff;
    return 0;
 }
 string buildOutputFilename(string & input_fname)
 {
    string outfile;
    size_t len = input_fname.length();
    if (len > 2 && input_fname.substr(len - 2) == ".I")
    {
        outfile = input_fname.substr(0, len - 2);
    }
    else
    {
        outfile = input_fname;
    }
    return outfile;
 }
--- a/lib/propane.rb
+++ b/lib/propane.rb
@ -0,0 +1,137 @@
 require "erb"
 require "set"
 require_relative "propane/cli"
 require_relative "propane/code_point_range"
 require_relative "propane/fa"
 require_relative "propane/fa/state"
 require_relative "propane/fa/state/transition"
 require_relative "propane/lexer"
 require_relative "propane/lexer/dfa"
 require_relative "propane/parser"
 require_relative "propane/parser/item"
 require_relative "propane/parser/item_set"
 require_relative "propane/regex"
 require_relative "propane/regex/nfa"
 require_relative "propane/regex/unit"
 require_relative "propane/rule"
 require_relative "propane/token"
 require_relative "propane/version"
 class Propane
  # EOF.
  TOKEN_EOF = 0xFFFFFFFC
  # Decoding error.
  TOKEN_DECODE_ERROR = 0xFFFFFFFD
  # Token ID for a "dropped" token.
  TOKEN_DROP = 0xFFFFFFFE
  # Invalid token ID.
  TOKEN_NONE = 0xFFFFFFFF
  class Error < RuntimeError
  end
  def initialize(input)
    @tokens = {}
    @rules = {}
    input = input.gsub("\r\n", "\n")
    while !input.empty?
      parse_grammar(input)
    end
  end
  def generate(output_file, log_file)
    expand_rules
    lexer = Lexer.new(@tokens)
    parser = Parser.new(@tokens, @rules)
    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
    result = erb.result(binding.clone)
    File.open(output_file, "wb") do |fh|
      fh.write(result)
    end
  end
  private
  def parse_grammar(input)
    if input.slice!(/\A\s+/)
      # Skip white space.
    elsif input.slice!(/\A#.*\n/)
      # Skip comment lines.
    elsif input.slice!(/\Amodule\s+(\S+)\n/)
      @modulename = $1
    elsif input.slice!(/\Aclass\s+(\S+)\n/)
      @classname = $1
    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
      name, pattern = $1, $2
      if pattern.nil?
        pattern = name
      end
      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
        raise Error.new("Invalid token name #{name}")
      end
      if @tokens[name]
        raise Error.new("Duplicate token name #{name}")
      else
        @tokens[name] = Token.new(name, pattern, @tokens.size)
      end
    elsif input.slice!(/\Adrop\s+(\S+)\n/)
      pattern = $1
      @tokens[name] = Token.new(nil, pattern, @tokens.size)
    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
      rule_name, components, code = $1, $2, $3
      components = components.strip.split(/\s+/)
      @rules[rule_name] ||= Rule.new(rule_name, @rules.size)
      @rules[rule_name].add_pattern(components, code)
    else
      if input.size > 25
        input = input.slice(0..20) + "..."
      end
      raise Error.new("Unexpected grammar input: #{input}")
    end
  end
  def expand_rules
    @rules.each do |rule_name, rule|
      if @tokens.include?(rule_name)
        raise Error.new("Rule name collides with token name #{rule_name}")
      end
    end
    unless @rules["Start"]
      raise Error.new("Start rule not found")
    end
    @rules.each do |rule_name, rule|
      rule.patterns.each do |rule|
        rule.components.map! do |component|
          if @tokens[component]
            @tokens[component]
          elsif @rules[component]
            @rules[component]
          else
            raise Error.new("Symbol #{component} not found")
          end
        end
      end
    end
  end
  class << self
    def run(input_file, output_file, log_file)
      begin
        propane = Propane.new(File.read(input_file))
        propane.generate(output_file, log_file)
      rescue Error => e
        $stderr.puts e.message
        return 2
      end
      return 0
    end
  end
 end
--- a/lib/propane/cli.rb
+++ b/lib/propane/cli.rb
@ -0,0 +1,54 @@
 class Propane
  module CLI
    USAGE = <<EOF
 Usage: #{$0} [options] <input-file> <output-file>
 Options:
  --log LOG   Write log file
  --version   Show program version and exit
  -h, --help  Show this usage and exit
 EOF
    class << self
      def run(args)
        params = []
        log_file = nil
        i = 0
        while i < args.size
          arg = args[i]
          case arg
          when "--log"
            if i + 1 < args.size
              i += 1
              log_file = args[i]
            end
          when "--version"
            puts "propane v#{VERSION}"
            return 0
          when "-h", "--help"
            puts USAGE
            return 0
          when /^-/
            $stderr.puts "Error: unknown option #{arg}"
            return 1
          else
            params << arg
          end
          i += 1
        end
        if params.size != 2
          $stderr.puts "Error: specify input and output files"
          return 1
        end
        unless File.readable?(params[0])
          $stderr.puts "Error: cannot read #{params[0]}"
          return 2
        end
        Propane.run(*params, log_file)
      end
    end
  end
 end
--- a/lib/propane/code_point_range.rb
+++ b/lib/propane/code_point_range.rb
@ -0,0 +1,84 @@
 class Propane
  class CodePointRange
    MAX_CODE_POINT = 0xFFFFFFFF
    attr_reader :first
    attr_reader :last
    include Comparable
    # Build a CodePointRange
    def initialize(first, last = nil)
      @first = first.ord
      if last
        @last = last.ord
        if @last < @first
          raise "Invalid CodePointRange: last code point must be > first code point"
        end
      else
        @last = @first
      end
    end
    def <=>(other)
      if self.first != other.first
        @first <=> other.first
      else
        @last <=> other.last
      end
    end
    def include?(v)
      if v.is_a?(CodePointRange)
        @first <= v.first && v.last <= @last
      else
        @first <= v && v <= @last
      end
    end
    def size
      @last - @first + 1
    end
    class << self
      def invert_ranges(code_point_ranges)
        new_ranges = []
        last_cp = -1
        code_point_ranges.sort.each do |code_point_range|
          if code_point_range.first > (last_cp + 1)
            new_ranges << CodePointRange.new(last_cp + 1, code_point_range.first - 1)
            last_cp = code_point_range.last
          else
            last_cp = [last_cp, code_point_range.last].max
          end
        end
        if last_cp < MAX_CODE_POINT
          new_ranges << CodePointRange.new(last_cp + 1, MAX_CODE_POINT)
        end
        new_ranges
      end
      def first_subrange(code_point_ranges)
        code_point_ranges.sort.reduce do |result, code_point_range|
          if code_point_range.include?(result.first)
            if code_point_range.last < result.last
              code_point_range
            else
              result
            end
          else
            if code_point_range.first <= result.last
              CodePointRange.new(result.first, code_point_range.first - 1)
            else
              result
            end
          end
        end
      end
    end
  end
 end
--- a/lib/propane/fa.rb
+++ b/lib/propane/fa.rb
@ -0,0 +1,61 @@
 class Propane
  class FA
    attr_reader :start_state
    def initialize
      @start_state = State.new
    end
    def to_s
      chr = lambda do |value|
        if value < 32 || value > 127
          "{#{value}}"
        else
          value.chr
        end
      end
      rv = ""
      states = enumerate
      states.each do |state, id|
        accepts_s = state.accepts ? " #{state.accepts}" : ""
        rv += "#{id}#{accepts_s}:\n"
        state.transitions.each do |transition|
          if transition.nil?
            range_s = "nil"
          else
            range_s = chr[transition.code_point_range.first]
            if transition.code_point_range.size > 1
              range_s += "-" + chr[transition.code_point_range.last]
            end
          end
          accepts_s = transition.destination.accepts ? " #{transition.destination.accepts}" : ""
          rv += "  #{range_s} => #{states[transition.destination]}#{accepts_s}\n"
        end
      end
      rv
    end
    def enumerate
      @_enumerated ||=
        begin
          id = 0
          states = {}
          visit = lambda do |state|
            unless states.include?(state)
              states[state] = id
              id += 1
              state.transitions.each do |transition|
                visit[transition.destination]
              end
            end
          end
          visit[@start_state]
          states
        end
    end
  end
 end
--- a/lib/propane/fa/state.rb
+++ b/lib/propane/fa/state.rb
@ -0,0 +1,51 @@
 class Propane
  class FA
    class State
      attr_accessor :accepts
      attr_reader :transitions
      def initialize
        @transitions = []
      end
      def add_transition(code_point_range, destination)
        @transitions << Transition.new(code_point_range, destination)
      end
      # Determine the set of states that can be reached by nil transitions.
      # from this state.
      #
      # @return [Set<NFA::State>]
      #   Set of states.
      def nil_transition_states
        states = Set[self]
        analyze_state = lambda do |state|
          state.nil_transitions.each do |transition|
            unless states.include?(transition.destination)
              states << transition.destination
              analyze_state[transition.destination]
            end
          end
        end
        analyze_state[self]
        states
      end
      def nil_transitions
        @transitions.select do |transition|
          transition.nil?
        end
      end
      def cp_transitions
        @transitions.reject do |transition|
          transition.nil?
        end
      end
    end
  end
 end
--- a/lib/propane/fa/state/transition.rb
+++ b/lib/propane/fa/state/transition.rb
@ -0,0 +1,23 @@
 class Propane
  class FA
    class State
      class Transition
        attr_reader :code_point_range
        attr_reader :destination
        def initialize(code_point_range, destination)
          @code_point_range = code_point_range
          @destination = destination
        end
        def nil?
          @code_point_range.nil?
        end
      end
    end
  end
 end
--- a/lib/propane/lexer.rb
+++ b/lib/propane/lexer.rb
@ -0,0 +1,13 @@
 class Propane
  class Lexer
    # @return [DFA]
    #   Lexer DFA.
    attr_accessor :dfa
    def initialize(tokens)
      @dfa = DFA.new(tokens)
    end
  end
 end
--- a/lib/propane/lexer/dfa.rb
+++ b/lib/propane/lexer/dfa.rb
@ -0,0 +1,118 @@
 class Propane
  class Lexer
    class DFA < FA
      def initialize(tokens)
        super()
        start_nfa = Regex::NFA.new
        tokens.each do |name, token|
          start_nfa.start_state.add_transition(nil, token.nfa.start_state)
        end
        @nfa_state_sets = {}
        @states = []
        @to_process = Set.new
        nil_transition_states = start_nfa.start_state.nil_transition_states
        register_nfa_state_set(nil_transition_states)
        while @to_process.size > 0
          state_set = @to_process.first
          @to_process.delete(state_set)
          process_nfa_state_set(state_set)
        end
        @start_state = @states[0]
      end
      def build_tables
        transition_table = []
        state_table = []
        states = enumerate
        states.each do |state, id|
          accepts =
            if state.accepts.nil?
              TOKEN_NONE
            elsif state.accepts.name
              state.accepts.id
            else
              TOKEN_DROP
            end
          state_table << {
            transition_table_index: transition_table.size,
            n_transitions: state.transitions.size,
            accepts: accepts,
          }
          state.transitions.each do |transition|
            transition_table << {
              first: transition.code_point_range.first,
              last: transition.code_point_range.last,
              destination: states[transition.destination],
            }
          end
        end
        [transition_table, state_table]
      end
      private
      def register_nfa_state_set(nfa_state_set)
        unless @nfa_state_sets.include?(nfa_state_set)
          state_id = @states.size
          @nfa_state_sets[nfa_state_set] = state_id
          @states << State.new
          @to_process << nfa_state_set
        end
      end
      def process_nfa_state_set(nfa_state_set)
        state_id = @nfa_state_sets[nfa_state_set]
        state = @states[state_id]
        if state_id > 0
          nfa_state_set.each do |nfa_state|
            if nfa_state.accepts
              if state.accepts
                if nfa_state.accepts.id < state.accepts.id
                  state.accepts = nfa_state.accepts
                end
              else
                state.accepts = nfa_state.accepts
              end
            end
          end
        end
        transitions = transitions_for(nfa_state_set)
        while transitions.size > 0
          subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))
          dest_nfa_states = transitions.reduce(Set.new) do |result, transition|
            if transition.code_point_range.include?(subrange)
              result << transition.destination
            end
            result
          end
          dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state|
            result + dest_nfa_state.nil_transition_states
          end
          register_nfa_state_set(dest_nfa_states)
          dest_state = @states[@nfa_state_sets[dest_nfa_states]]
          state.add_transition(subrange, dest_state)
          transitions.delete_if do |transition|
            transition.code_point_range.last <= subrange.last
          end
          transitions.map! do |transition|
            if transition.code_point_range.first <= subrange.last
              Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination)
            else
              transition
            end
          end
        end
      end
      def transitions_for(nfa_state_set)
        nfa_state_set.reduce([]) do |result, state|
          result + state.cp_transitions
        end
      end
    end
  end
 end
--- a/lib/propane/parser.rb
+++ b/lib/propane/parser.rb
@ -0,0 +1,84 @@
 class Propane
  class Parser
    def initialize(tokens, rules)
      @token_eof = Token.new("$", nil, TOKEN_EOF)
      @item_sets = []
      @item_sets_set = {}
      start_items = rules["Start"].patterns.map do |pattern|
        pattern.components << @token_eof
        Item.new(pattern, 0)
      end
      eval_item_sets = Set.new
      eval_item_sets << ItemSet.new(start_items)
      while eval_item_sets.size > 0
        this_eval_item_sets = eval_item_sets
        eval_item_sets = Set.new
        this_eval_item_sets.each do |item_set|
          unless @item_sets_set.include?(item_set)
            item_set.id = @item_sets.size
            @item_sets << item_set
            @item_sets_set[item_set] = item_set
            item_set.follow_symbols.each do |follow_symbol|
              unless follow_symbol == @token_eof
                follow_set = item_set.build_follow_set(follow_symbol)
                eval_item_sets << follow_set
              end
            end
          end
        end
      end
      @item_sets.each do |item_set|
        process_item_set(item_set)
        puts "Item set #{item_set.id}:"
        ids = item_set.in_sets.map(&:id)
        if ids.size > 0
          puts "    (in from #{ids.join(", ")})"
        end
        puts item_set
        item_set.follow_item_set.each do |follow_symbol, follow_item_set|
          puts " #{follow_symbol.name} => #{follow_item_set.id}"
        end
        puts
      end
    end
    def build_tables
      shift_table = []
      state_table = []
      @item_sets.each do |item_set|
        shift_entries = item_set.follow_symbols.select do |follow_symbol|
          follow_symbol.is_a?(Token)
        end.map do |follow_symbol|
          {
            token_id: follow_symbol.id,
            state_id: item_set.follow_item_set[follow_symbol].id,
          }
        end
        state_table << {
          shift_index: shift_table.size,
          n_shifts: shift_entries.size,
        }
        shift_table += shift_entries
      end
      [state_table, shift_table]
    end
    private
    def process_item_set(item_set)
      item_set.follow_symbols.each do |follow_symbol|
        unless follow_symbol == @token_eof
          follow_set = @item_sets_set[item_set.build_follow_set(follow_symbol)]
          item_set.follow_item_set[follow_symbol] = follow_set
          follow_set.in_sets << item_set
        end
      end
    end
  end
 end
--- a/lib/propane/parser/item.rb
+++ b/lib/propane/parser/item.rb
@ -0,0 +1,69 @@
 class Propane
  class Parser
    class Item
      attr_reader :pattern
      attr_reader :position
      def initialize(pattern, position)
        @pattern = pattern
        @position = position
      end
      def next_component
        @pattern.components[@position]
      end
      def hash
        [@pattern, @position].hash
      end
      def ==(other)
        @pattern == other.pattern && @position == other.position
      end
      def eql?(other)
        self == other
      end
      def closed_items
        if @pattern.components[@position].is_a?(Rule)
          @pattern.components[@position].patterns.map do |pattern|
            Item.new(pattern, 0)
          end
        else
          []
        end
      end
      def follow_symbol
        @pattern.components[@position]
      end
      def followed_by?(symbol)
        follow_symbol == symbol
      end
      def next_position
        Item.new(@pattern, @position + 1)
      end
      def to_s
        parts = []
        @pattern.components.each_with_index do |symbol, index|
          if @position == index
            parts << "."
          end
          parts << symbol.name
        end
        if @position == @pattern.components.size
          parts << "."
        end
        "#{@pattern.rule.name} -> #{parts.join(" ")}"
      end
    end
  end
 end
--- a/lib/propane/parser/item_set.rb
+++ b/lib/propane/parser/item_set.rb
@ -0,0 +1,76 @@
 class Propane
  class Parser
    class ItemSet
      attr_reader :items
      attr_accessor :id
      # @return [Hash]
      #   Maps a follow symbol to its item set.
      attr_reader :follow_item_set
      # @return [Set]
      #   Item sets leading to this item set.
      attr_reader :in_sets
      def initialize(items)
        @items = Set.new(items)
        @follow_item_set = {}
        @in_sets = Set.new
        close!
      end
      def follow_symbols
        Set.new(@items.map(&:follow_symbol).compact)
      end
      def build_follow_set(symbol)
        ItemSet.new(items_followed_by(symbol).map(&:next_position))
      end
      def hash
        @items.hash
      end
      def ==(other)
        @items.eql?(other.items)
      end
      def eql?(other)
        self == other
      end
      def to_s
        @items.map(&:to_s).join("\n")
      end
      private
      def close!
        eval_items = @items
        while eval_items.size > 0
          this_eval_items = eval_items
          eval_items = Set.new
          this_eval_items.each do |item|
            item.closed_items.each do |new_item|
              unless @items.include?(new_item)
                eval_items << new_item
              end
            end
          end
          @items += eval_items
        end
      end
      def items_followed_by(symbol)
        @items.select do |item|
          item.followed_by?(symbol)
        end
      end
    end
  end
 end
--- a/lib/propane/regex.rb
+++ b/lib/propane/regex.rb
@ -0,0 +1,162 @@
 class Propane
  class Regex
    attr_reader :unit
    attr_reader :nfa
    def initialize(pattern)
      @pattern = pattern.dup
      @unit = parse_alternates
      @nfa = @unit.to_nfa
      if @pattern != ""
        raise Error.new(%[Unexpected "#{@pattern}" in pattern])
      end
    end
    private
    def parse_alternates
      au = AlternatesUnit.new
      while @pattern != ""
        c = @pattern[0]
        return au if c == ")"
        @pattern.slice!(0)
        case c
        when "["
          au << parse_character_class
        when "("
          au << parse_group
        when "*", "+", "?", "{"
          if last_unit = au.last_unit
            case c
            when "*"
              min_count, max_count = 0, nil
            when "+"
              min_count, max_count = 1, nil
            when "?"
              min_count, max_count = 0, 1
            when "{"
              min_count, max_count = parse_curly_count
            end
            mu = MultiplicityUnit.new(last_unit, min_count, max_count)
            au.replace_last!(mu)
          else
            raise Error.new("#{c} follows nothing")
          end
        when "|"
          au.new_alternate!
        when "\\"
          au << parse_backslash
        when "."
          au << period_character_class
        else
          au << CharacterRangeUnit.new(c)
        end
      end
      au
    end
    def parse_group
      au = parse_alternates
      if @pattern[0] != ")"
        raise Error.new("Unterminated group in pattern")
      end
      @pattern.slice!(0)
      au
    end
    def parse_character_class
      ccu = CharacterClassUnit.new
      index = 0
      loop do
        if @pattern == ""
          raise Error.new("Unterminated character class")
        end
        c = @pattern.slice!(0)
        if c == "]"
          break
        elsif c == "^" && index == 0
          ccu.negate = true
        elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
          ccu << CharacterRangeUnit.new(c)
        elsif c == "\\"
          ccu << parse_backslash
        elsif c == "-" && @pattern[0] != "]"
          begin_cu = ccu.last_unit
          unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.code_point_range.size == 1
            raise Error.new("Character range must be between single characters")
          end
          if @pattern[0] == "\\"
            @pattern.slice!(0)
            end_cu = parse_backslash
            unless end_cu.is_a?(CharacterRangeUnit) && end_cu.code_point_range.size == 1
              raise Error.new("Character range must be between single characters")
            end
            max_code_point = end_cu.code_point
          else
            max_code_point = @pattern[0].ord
            @pattern.slice!(0)
          end
          cru = CharacterRangeUnit.new(begin_cu.first, max_code_point)
          ccu.replace_last!(cru)
        else
          ccu << CharacterRangeUnit.new(c)
        end
        index += 1
      end
      ccu
    end
    def parse_curly_count
      if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
        min_count, comma, max_count, pattern = $1, $2, $3, $4
        min_count = min_count.to_i
        if comma.to_s == ""
          max_count = min_count
        elsif max_count.to_s != ""
          max_count = max_count.to_i
          if max_count < min_count
            raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
          end
        else
          max_count = nil
        end
        @pattern = pattern
        [min_count, max_count]
      else
        raise Error.new("Unexpected match count at #{@pattern}")
      end
    end
    def parse_backslash
      if @pattern == ""
        raise Error.new("Error: unfollowed \\")
      else
        c = @pattern.slice!(0)
        case c
        when "d"
          CharacterRangeUnit.new("0", "9")
        when "s"
          ccu = CharacterClassUnit.new
          ccu << CharacterRangeUnit.new(" ")
          ccu << CharacterRangeUnit.new("\t")
          ccu << CharacterRangeUnit.new("\r")
          ccu << CharacterRangeUnit.new("\n")
          ccu << CharacterRangeUnit.new("\f")
          ccu << CharacterRangeUnit.new("\v")
          ccu
        else
          CharacterRangeUnit.new(c)
        end
      end
    end
    def period_character_class
      ccu = CharacterClassUnit.new
      ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
      ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
      ccu
    end
  end
 end
--- a/lib/propane/regex/nfa.rb
+++ b/lib/propane/regex/nfa.rb
@ -0,0 +1,26 @@
 class Propane
  class Regex
    class NFA < FA
      attr_reader :end_state
      def initialize
        super()
        @end_state = State.new
      end
      class << self
        def empty
          nfa = NFA.new
          nfa.start_state.add_transition(nil, nfa.end_state)
          nfa
        end
      end
    end
  end
 end
--- a/lib/propane/regex/unit.rb
+++ b/lib/propane/regex/unit.rb
@ -0,0 +1,172 @@
 class Propane
  class Regex
    class Unit
    end
    class SequenceUnit < Unit
      attr_accessor :units
      def initialize
        @units = []
      end
      def method_missing(*args)
        @units.__send__(*args)
      end
      def to_nfa
        if @units.empty?
          NFA.empty
        else
          nfa = NFA.new
          unit_nfas = @units.map do |unit|
            unit.to_nfa
          end
          nfa.start_state.add_transition(nil, unit_nfas[0].start_state)
          unit_nfas.reduce do |prev_nfa, next_nfa|
            prev_nfa.end_state.add_transition(nil, next_nfa.start_state)
            next_nfa
          end.end_state.add_transition(nil, nfa.end_state)
          nfa
        end
      end
    end
    class AlternatesUnit < Unit
      attr_accessor :alternates
      def initialize
        @alternates = []
        new_alternate!
      end
      def new_alternate!
        @alternates << SequenceUnit.new
      end
      def <<(unit)
        @alternates[-1] << unit
      end
      def last_unit
        @alternates[-1][-1]
      end
      def replace_last!(new_unit)
        @alternates[-1][-1] = new_unit
      end
      def to_nfa
        if @alternates.size == 0
          NFA.empty
        elsif @alternates.size == 1
          @alternates[0].to_nfa
        else
          nfa = NFA.new
          alternate_nfas = @alternates.map do |alternate|
            alternate.to_nfa
          end
          alternate_nfas.each do |alternate_nfa|
            nfa.start_state.add_transition(nil, alternate_nfa.start_state)
            alternate_nfa.end_state.add_transition(nil, nfa.end_state)
          end
          nfa
        end
      end
    end
    class CharacterRangeUnit < Unit
      attr_reader :code_point_range
      def initialize(c1, c2 = nil)
        @code_point_range = CodePointRange.new(c1, c2)
      end
      def first
        @code_point_range.first
      end
      def last
        @code_point_range.last
      end
      def to_nfa
        nfa = NFA.new
        nfa.start_state.add_transition(@code_point_range, nfa.end_state)
        nfa
      end
    end
    class CharacterClassUnit < Unit
      attr_accessor :units
      attr_accessor :negate
      def initialize
        @units = []
        @negate = false
      end
      def initialize
        @units = []
      end
      def method_missing(*args)
        @units.__send__(*args)
      end
      def <<(thing)
        if thing.is_a?(CharacterClassUnit)
          thing.each do |ccu_unit|
            @units << ccu_unit
          end
        else
          @units << thing
        end
      end
      def last_unit
        @units[-1]
      end
      def replace_last!(new_unit)
        @units[-1] = new_unit
      end
      def to_nfa
        nfa = NFA.new
        if @units.empty?
          nfa.start_state.add_transition(nil, nfa.end_state)
        else
          code_point_ranges = @units.map(&:code_point_range)
          if @negate
            code_point_ranges = CodePointRange.invert_ranges(code_point_ranges)
          end
          code_point_ranges.each do |code_point_range|
            nfa.start_state.add_transition(code_point_range, nfa.end_state)
          end
        end
        nfa
      end
    end
    class MultiplicityUnit < Unit
      attr_accessor :unit
      attr_accessor :min_count
      attr_accessor :max_count
      def initialize(unit, min_count, max_count)
        @unit = unit
        @min_count = min_count
        @max_count = max_count
      end
      def to_nfa
        nfa = NFA.new
        last_state = nfa.start_state
        unit_nfa = nil
        @min_count.times do
          unit_nfa = @unit.to_nfa
          last_state.add_transition(nil, unit_nfa.start_state)
          last_state = unit_nfa.end_state
        end
        last_state.add_transition(nil, nfa.end_state)
        if @max_count.nil?
          if @min_count == 0
            unit_nfa = @unit.to_nfa
            last_state.add_transition(nil, unit_nfa.start_state)
          end
          unit_nfa.end_state.add_transition(nil, unit_nfa.start_state)
          unit_nfa.end_state.add_transition(nil, nfa.end_state)
        else
          (@max_count - @min_count).times do
            unit_nfa = @unit.to_nfa
            last_state.add_transition(nil, unit_nfa.start_state)
            unit_nfa.end_state.add_transition(nil, nfa.end_state)
            last_state = unit_nfa.end_state
          end
        end
        nfa
      end
    end
  end
 end
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -0,0 +1,39 @@
 class Propane
  class Rule
    class Pattern
      attr_reader :rule
      attr_reader :components
      attr_reader :code
      def initialize(rule, components, code)
        @rule = rule
        @components = components
        @code = code
      end
    end
    attr_reader :id
    attr_reader :name
    attr_reader :patterns
    def initialize(name, id)
      @name = name
      @id = id
      @patterns = []
    end
    def add_pattern(components, code)
      @patterns << Pattern.new(self, components, code)
    end
  end
 end
--- a/lib/propane/token.rb
+++ b/lib/propane/token.rb
@ -0,0 +1,42 @@
 class Propane
  class Token
    # @return [String]
    #   Token name.
    attr_reader :name
    # @return [String]
    #   Token pattern.
    attr_reader :pattern
    # @return [Integer]
    #   Token ID.
    attr_reader :id
    # @return [Regex::NFA]
    #   Regex NFA for matching the token.
    attr_reader :nfa
    def initialize(name, pattern, id)
      @name = name
      @pattern = pattern
      @id = id
      unless pattern.nil?
        regex = Regex.new(pattern)
        regex.nfa.end_state.accepts = self
        @nfa = regex.nfa
      end
    end
    def c_name
      @name.upcase
    end
    def to_s
      @name
    end
  end
 end
--- a/lib/propane/version.rb
+++ b/lib/propane/version.rb
@ -0,0 +1,3 @@
 class Propane
  VERSION = "0.1.0"
 end
--- a/propane.sh
+++ b/propane.sh
@ -0,0 +1,2 @@
 #!/bin/sh
 exec bundle exec ruby -Ilib bin/propane "$@"
--- a/1
+++ b/1
@ -1 +0,0 @@
 Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef
--- a/spec/propane/code_point_range_spec.rb
+++ b/spec/propane/code_point_range_spec.rb
@ -0,0 +1,87 @@
 class Propane
  describe CodePointRange do
    describe "#<=>" do
      it "sorts ranges" do
        arr = [
          CodePointRange.new(100,102),
          CodePointRange.new(65, 68),
          CodePointRange.new(65, 65),
          CodePointRange.new(100, 100),
          CodePointRange.new(68, 70),
        ]
        arr.sort!
        expect(arr[0]).to eq CodePointRange.new(65, 65)
        expect(arr[1]).to eq CodePointRange.new(65, 68)
        expect(arr[2]).to eq CodePointRange.new(68, 70)
        expect(arr[3]).to eq CodePointRange.new(100, 100)
        expect(arr[4]).to eq CodePointRange.new(100, 102)
      end
    end
    describe "#include?" do
      it "returns whether the code point is included in the range" do
        expect(CodePointRange.new(100).include?(100)).to be_truthy
        expect(CodePointRange.new(100, 100).include?(99)).to be_falsey
        expect(CodePointRange.new(100, 100).include?(101)).to be_falsey
        expect(CodePointRange.new(100, 120).include?(99)).to be_falsey
        expect(CodePointRange.new(100, 120).include?(100)).to be_truthy
        expect(CodePointRange.new(100, 120).include?(110)).to be_truthy
        expect(CodePointRange.new(100, 120).include?(120)).to be_truthy
        expect(CodePointRange.new(100, 120).include?(121)).to be_falsey
      end
      it "returns whether the range is included in the range" do
        expect(CodePointRange.new(100).include?(CodePointRange.new(100))).to be_truthy
        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99))).to be_falsey
        expect(CodePointRange.new(100, 100).include?(CodePointRange.new(99, 100))).to be_falsey
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(90, 110))).to be_falsey
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 130))).to be_falsey
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 120))).to be_truthy
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(100, 110))).to be_truthy
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(110, 120))).to be_truthy
        expect(CodePointRange.new(100, 120).include?(CodePointRange.new(102, 118))).to be_truthy
      end
    end
    describe ".invert_ranges" do
      it "inverts ranges" do
        expect(CodePointRange.invert_ranges(
          [CodePointRange.new(60, 90),
           CodePointRange.new(80, 85),
           CodePointRange.new(80, 100),
           CodePointRange.new(101),
           CodePointRange.new(200, 300)])).to eq [
             CodePointRange.new(0, 59),
             CodePointRange.new(102, 199),
             CodePointRange.new(301, 0xFFFFFFFF)]
        expect(CodePointRange.invert_ranges(
          [CodePointRange.new(0, 500),
           CodePointRange.new(7000, 0xFFFFFFFF)])).to eq [
             CodePointRange.new(501, 6999)]
      end
    end
    describe ".first_subrange" do
      it "returns the first subrange to split" do
        expect(CodePointRange.first_subrange(
          [CodePointRange.new(65, 90),
           CodePointRange.new(66, 66),
           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65)
        expect(CodePointRange.first_subrange(
          [CodePointRange.new(65, 90)])).to eq CodePointRange.new(65, 90)
        expect(CodePointRange.first_subrange(
          [CodePointRange.new(65, 90),
           CodePointRange.new(80, 90)])).to eq CodePointRange.new(65, 79)
        expect(CodePointRange.first_subrange(
          [CodePointRange.new(65, 90),
           CodePointRange.new(65, 100),
           CodePointRange.new(65, 95)])).to eq CodePointRange.new(65, 90)
        expect(CodePointRange.first_subrange(
          [CodePointRange.new(100, 120),
           CodePointRange.new(70, 90)])).to eq CodePointRange.new(70, 90)
      end
    end
  end
 end
--- a/spec/propane/lexer/dfa_spec.rb
+++ b/spec/propane/lexer/dfa_spec.rb
@ -0,0 +1,121 @@
 class TestLexer
  def initialize(token_dfa)
    @token_dfa = token_dfa
  end
  def lex(input)
    input_chars = input.chars
    output = []
    while lexed_token = lex_token(input_chars)
      output << lexed_token
      input_chars.slice!(0, lexed_token[1].size)
    end
    unless input_chars.empty?
      raise "Unmatched input #{input_chars.join(" ")}"
    end
    output
  end
  def lex_token(input_chars)
    return nil if input_chars.empty?
    s = ""
    current_state = @token_dfa.start_state
    last_accepts = nil
    last_s = nil
    input_chars.each_with_index do |input_char, index|
      if next_state = transition(current_state, input_char)
        s += input_char
        current_state = next_state
        if current_state.accepts
          last_accepts = current_state.accepts
          last_s = s
        end
      else
        break
      end
    end
    if last_accepts
      [last_accepts.name, last_s]
    end
  end
  def transition(state, input_char)
    state.transitions.each do |transition|
      if transition.code_point_range.include?(input_char.ord)
        return transition.destination
      end
    end
    nil
  end
 end
 def run(grammar, input)
  propane = Propane.new(grammar)
  token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
  test_lexer = TestLexer.new(token_dfa)
  test_lexer.lex(input)
 end
 describe Propane::Lexer::DFA do
  it "lexes a simple token" do
    expect(run(<<EOF, "foo")).to eq [["foo", "foo"]]
 token foo
 EOF
  end
  it "lexes two tokens" do
    expected = [
      ["foo", "foo"],
      ["bar", "bar"],
    ]
    expect(run(<<EOF, "foobar")).to eq expected
 token foo
 token bar
 EOF
  end
  it "lexes the longer of multiple options" do
    expected = [
      ["identifier", "foobar"],
    ]
    expect(run(<<EOF, "foobar")).to eq expected
 token foo
 token bar
 token identifier [a-z]+
 EOF
    expected = [
      ["plusplus", "++"],
      ["plus", "+"],
    ]
    expect(run(<<EOF, "+++")).to eq expected
 token plus \\+
 token plusplus \\+\\+
 EOF
  end
  it "lexes whitespace" do
    expected = [
      ["foo", "foo"],
      ["WS", " \t"],
      ["bar", "bar"],
    ]
    expect(run(<<EOF, "foo \tbar")).to eq expected
 token foo
 token bar
 token WS \\s+
 EOF
  end
  it "allows dropping a matched pattern" do
    expected = [
      ["foo", "foo"],
      [nil, " \t"],
      ["bar", "bar"],
    ]
    expect(run(<<EOF, "foo \tbar")).to eq expected
 token foo
 token bar
 drop \\s+
 EOF
  end
 end
--- a/spec/propane/parser/item_spec.rb
+++ b/spec/propane/parser/item_spec.rb
@ -0,0 +1,19 @@
 class Propane
  class Parser
    describe Item do
      it "operates properly with a set" do
        rule = Object.new
        item1 = Item.new(rule, 2)
        item2 = Item.new(rule, 2)
        expect(item1).to eq item2
        expect(item1.eql?(item2)).to be_truthy
        set = Set.new([item1, item2])
        expect(set.size).to eq 1
      end
    end
  end
 end
--- a/spec/propane/regex_spec.rb
+++ b/spec/propane/regex_spec.rb
@ -0,0 +1,333 @@
 class Propane
  RSpec.describe Regex do
    it "parses an empty expression" do
      regex = Regex.new("")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0].size).to eq 0
    end
    it "parses a single character unit expression" do
      regex = Regex.new("a")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
    end
    it "parses a group with a single character unit expression" do
      regex = Regex.new("(a)")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::AlternatesUnit
      alt_unit = seq_unit[0]
      expect(alt_unit.alternates.size).to eq 1
      expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
      expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
    end
    it "parses a *" do
      regex = Regex.new("a*")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 0
      expect(m_unit.max_count).to be_nil
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
    end
    it "parses a +" do
      regex = Regex.new("a+")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 1
      expect(m_unit.max_count).to be_nil
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
    end
    it "parses a ?" do
      regex = Regex.new("a?")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 0
      expect(m_unit.max_count).to eq 1
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
    end
    it "parses a multiplicity count" do
      regex = Regex.new("a{5}")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 5
      expect(m_unit.max_count).to eq 5
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
    end
    it "parses a minimum-only multiplicity count" do
      regex = Regex.new("a{5,}")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 5
      expect(m_unit.max_count).to be_nil
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
    end
    it "parses a minimum and maximum multiplicity count" do
      regex = Regex.new("a{5,8}")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
      m_unit = seq_unit[0]
      expect(m_unit.min_count).to eq 5
      expect(m_unit.max_count).to eq 8
      expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
      expect(m_unit.unit.first).to eq "a".ord
    end
    it "parses an escaped *" do
      regex = Regex.new("a\\*")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 2
      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[0].first).to eq "a".ord
      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[1].first).to eq "*".ord
    end
    it "parses an escaped +" do
      regex = Regex.new("a\\+")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 2
      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[0].first).to eq "a".ord
      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[1].first).to eq "+".ord
    end
    it "parses an escaped \\" do
      regex = Regex.new("\\\\d")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 2
      expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[0].first).to eq "\\".ord
      expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
      expect(seq_unit[1].first).to eq "d".ord
    end
    it "parses a character class" do
      regex = Regex.new("[a-z_]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.negate).to be_falsey
      expect(ccu.size).to eq 2
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "a".ord
      expect(ccu[0].last).to eq "z".ord
      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
      expect(ccu[1].first).to eq "_".ord
    end
    it "parses a negated character class" do
      regex = Regex.new("[^xyz]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.negate).to be_truthy
      expect(ccu.size).to eq 3
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "x".ord
    end
    it "parses - as a plain character at beginning of a character class" do
      regex = Regex.new("[-9]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.size).to eq 2
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "-".ord
    end
    it "parses - as a plain character at end of a character class" do
      regex = Regex.new("[0-]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.size).to eq 2
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "0".ord
      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
      expect(ccu[1].first).to eq "-".ord
    end
    it "parses - as a plain character at beginning of a negated character class" do
      regex = Regex.new("[^-9]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.negate).to be_truthy
      expect(ccu.size).to eq 2
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "-".ord
    end
    it "parses . as a plain character in a character class" do
      regex = Regex.new("[.]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.negate).to be_falsey
      expect(ccu.size).to eq 1
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq ".".ord
    end
    it "parses - as a plain character when escaped in middle of character class" do
      regex = Regex.new("[0\\-9]")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      seq_unit = regex.unit.alternates[0]
      expect(seq_unit.size).to eq 1
      expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
      ccu = seq_unit[0]
      expect(ccu.negate).to be_falsey
      expect(ccu.size).to eq 3
      expect(ccu[0]).to be_a Regex::CharacterRangeUnit
      expect(ccu[0].first).to eq "0".ord
      expect(ccu[1]).to be_a Regex::CharacterRangeUnit
      expect(ccu[1].first).to eq "-".ord
      expect(ccu[2]).to be_a Regex::CharacterRangeUnit
      expect(ccu[2].first).to eq "9".ord
    end
    it "parses alternates" do
      regex = Regex.new("ab|c")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 2
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[0].size).to eq 2
      expect(regex.unit.alternates[1].size).to eq 1
    end
    it "parses a ." do
      regex = Regex.new("a.b")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 1
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
      expect(regex.unit.alternates[0][1].units.size).to eq 2
      expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
    end
    it "parses something complex" do
      regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
      expect(regex.unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates.size).to eq 4
      expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[0].size).to eq 1
      expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
      expect(regex.unit.alternates[0][0].min_count).to eq 0
      expect(regex.unit.alternates[0][0].max_count).to be_nil
      expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
      expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
      expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
      expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
      expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[1].size).to eq 1
      expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
      expect(regex.unit.alternates[1][0].negate).to be_truthy
      expect(regex.unit.alternates[1][0].size).to eq 1
      expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[2].size).to eq 2
      expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[2][0].first).to eq "|".ord
      expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[2][1].first).to eq "v".ord
      expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
      expect(regex.unit.alternates[3].size).to eq 1
      expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
      expect(regex.unit.alternates[3][0].min_count).to eq 1
      expect(regex.unit.alternates[3][0].max_count).to be_nil
      expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
      expect(regex.unit.alternates[3][0].unit.size).to eq 1
      expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
      expect(regex.unit.alternates[3][0].unit[0].first).to eq "x".ord
      expect(regex.unit.alternates[3][0].unit[0].last).to eq "y".ord
    end
  end
 end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -0,0 +1,97 @@
 require "fileutils"
 describe Propane do
  def write_grammar(grammar)
    File.write("spec/run/testparser.i", grammar)
  end
  def build_parser
    result = system(*%w[./propane.sh spec/run/testparser.i spec/run/testparser.d])
    expect(result).to be_truthy
  end
  def compile(test_file)
    result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d], test_file)
    expect(result).to be_truthy
  end
  def run
    result = system("spec/run/testparser")
    expect(result).to be_truthy
  end
  before(:each) do
    FileUtils.rm_rf("spec/run")
    FileUtils.mkdir_p("spec/run")
  end
  it "generates a D lexer" do
    write_grammar <<EOF
 token int \\d+
 token plus \\+
 token times \\*
 drop \\s+
 Start: [Foo] <<
 >>
 Foo: [int] <<
 >>
 Foo: [plus] <<
 >>
 EOF
    build_parser
    compile("spec/test_d_lexer.d")
    run
  end
  it "generates a parser" do
    write_grammar <<EOF
 token plus \\+
 token times \\*
 token zero 0
 token one 1
 Start: [E] <<
 >>
 E: [E times B] <<
 >>
 E: [E plus B] <<
 >>
 E: [B] <<
 >>
 B: [zero] <<
 >>
 B: [one] <<
 >>
 EOF
    build_parser
  end
  it "distinguishes between multiple identical rules with lookahead symbol" do
    write_grammar <<EOF
 token a
 token b
 Start: [R1 a] <<
 >>
 Start: [R2 b] <<
 >>
 R1: [a b] <<
 >>
 R2: [a b] <<
 >>
 EOF
    build_parser
  end
  it "handles reducing a rule that could be arrived at from multiple states" do
    write_grammar <<EOF
 token a
 token b
 Start: [a R1] <<
 >>
 Start: [b R1] <<
 >>
 R1: [b] <<
 >>
 EOF
    build_parser
  end
 end
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@ -0,0 +1,11 @@
 require "bundler/setup"
 require "propane"
 RSpec.configure do |config|
  # Enable flags like --only-failures and --next-failure
  config.example_status_persistence_file_path = ".rspec_status"
  config.expect_with :rspec do |c|
    c.syntax = :expect
  end
 end
--- a/spec/test_d_lexer.d
+++ b/spec/test_d_lexer.d
@ -0,0 +1,66 @@
 import testparser;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    alias DCP = Testparser.Decoder.DecodedCodePoint;
    string inputstring = "5+\n 66";
    const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr;
    size_t input_length = inputstring.length;
    DCP dcp;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP('5', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP('+', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP('\n', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP(' ', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP('6', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP('6', 1u));
    input += dcp.code_point_length;
    input_length -= dcp.code_point_length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
    inputstring = "\xf0\x9f\xa7\xa1";
    input = cast(const(ubyte) *)inputstring.ptr;
    input_length = inputstring.length;
    dcp = Testparser.Decoder.decode_code_point(input, input_length);
    assert(dcp == DCP(0x1F9E1, 4u));
 }
 unittest
 {
    alias LT = Testparser.Lexer.LexedToken;
    string input = "5 + 4 * \n677 + 567";
    Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length);
    assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_INT));
    assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_PLUS));
    assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_INT));
    assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_TIMES));
    assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_INT));
    assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_PLUS));
    assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_INT));
    assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_EOF));
    lexer = new Testparser.Lexer(null, 0u);
    assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_EOF));
 }
--- a/tests/Makefile
+++ b/tests/Makefile
@ -1,14 +0,0 @@
 all:
 	for d in *; do \
 		if [ -d $$d ]; then \
 			make -C $$d; \
 		fi; \
 	done
 clean:
 	for d in *; do \
 		if [ -d $$d ]; then \
 			make -C $$d clean; \
 		fi; \
 	done
--- a/tests/build/Makefile
+++ b/tests/build/Makefile
@ -1,15 +0,0 @@
 TARGET   := test
 I_SOURCE := itest
 CXXFLAGS := -O2
 LDFLAGS  := -lpcre
 all: $(TARGET)
 	./$(TARGET)
 $(TARGET): $(shell which imbecile) $(I_SOURCE).I $(wildcard *.cc)
 	imbecile $(I_SOURCE).I
 	$(CXX) -o $@ *.cc $(LDFLAGS)
 clean:
 	-rm -f $(TARGET) *.o $(I_SOURCE).cc $(I_SOURCE).h
--- a/tests/build/itest.I
+++ b/tests/build/itest.I
@ -1,37 +0,0 @@
 [tokens]
 AND         and
 OR          or
 NOT         not
 LPAREN      \(
 RPAREN      \)
 WS          \s+
 EQUALS      = %{ cout << "Saw '='" << endl; %}
 IDENTIFIER  [a-zA-Z_][a-zA-Z_0-9]* %{
    cout << "Identify: '" << matches[0] << "'" << endl;
 %}
 DEC_INT     [1-9]\d*\b
 ${
    uint64_t value;
 $}
 %{
    sscanf(matches[0].c_str(), "%lld", &value);
    cout << "value: " << value << endl;
 %}
 HEX_INT     0x([0-9a-fA-F]+)\b ${ uint64_t value; $} %{
    sscanf(matches[1].c_str(), "%llx", &value);
    cout << "value: " << value << endl;
 %}
 OCT_INT     0([0-7]*)\b
 BIN_INT     0b([01]+)\b
 [rules]
 Assignment := IDENTIFIER ASSIGN Expression
 Expression := IDENTIFIER \
            | Assignment
--- a/tests/build/main.cc
+++ b/tests/build/main.cc
@ -1,17 +0,0 @@
 #include <sstream>
 #include <string>
 #include "itest.h"
 using namespace std;
 int main(int argc, char * argv[])
 {
    Parser p;
    stringstream t(string(
                "hi there (one and two and three and four) or (two = nine)\n"
                "0x42 12345 0 011 0b0011\n"
                ));
    p.parse(t);
 }
--- a/tmpl/parser.cc
+++ b/tmpl/parser.cc
@ -1,202 +0,0 @@
 #include <string.h>                 /* memcpy() */
 #include <pcre.h>
 #include <iostream>
 #include <vector>
 #include {%header_name%}
 using namespace std;
 #ifdef I_NAMESPACE
 namespace I_NAMESPACE {
 #endif
 I_CLASSNAME::I_CLASSNAME()
    : m_errstr(NULL)
 {
 }
 static TokenRef buildToken(int typeindex)
 {
    TokenRef token;
    switch (typeindex)
    {
        {%buildToken%}
    }
    if (!token.isNull())
    {
        token->setType(typeindex);
    }
    return token;
 }
 static void read_istream(istream & i, vector<char> & buff, int & size)
 {
    size = 0;
    int bytes_read;
    char read_buff[1000];
    while (!i.eof())
    {
        i.read(&read_buff[0], sizeof(read_buff));
        bytes_read = i.gcount();
        size += bytes_read;
        for (int j = 0; j < bytes_read; j++)
            buff.push_back(read_buff[j]);
    }
 }
 bool I_CLASSNAME::parse(istream & i)
 {
    struct {
        const char * name;
        const char * definition;
        bool process;
        pcre * re;
        pcre_extra * re_extra;
    } tokens[] = {
        {%token_list%}
    };
    if (sizeof(tokens)/sizeof(tokens[0]) == 0)
    {
        m_errstr = "No tokens defined";
        return false;
    }
    vector<char> buff;
    int buff_size;
    read_istream(i, buff, buff_size);
    if (buff_size <= 0)
    {
        m_errstr = "0-length input string";
        return false;
    }
    /* append trailing NUL byte for pcre functions */
    buff.push_back('\0');
    /* compile all token regular expressions */
    for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
    {
        const char * errptr;
        int erroffset;
        tokens[i].re = pcre_compile(tokens[i].definition, 0,
                &errptr, &erroffset, NULL);
        if (tokens[i].re == NULL)
        {
            cerr << "Error compiling token '" << tokens[i].name
                << "' regular expression at position " << erroffset
                << ": " << errptr << endl;
            m_errstr = "Error in token regular expression";
            return false;
        }
        tokens[i].re_extra = pcre_study(tokens[i].re, 0, &errptr);
    }
    int buff_pos = 0;
    const int ovector_num_matches = 16;
    const int ovector_size = 3 * (ovector_num_matches + 1);
    int ovector[ovector_size];
    while (buff_pos < buff_size)
    {
        int longest_match_length = 0;
        int longest_match_index = -1;
        int longest_match_ovector[ovector_size];
        for (int i = 0; i < sizeof(tokens)/sizeof(tokens[0]); i++)
        {
            int rc = pcre_exec(tokens[i].re, tokens[i].re_extra,
                    &buff[0], buff_size, buff_pos,
                    PCRE_ANCHORED | PCRE_NOTEMPTY,
                    ovector, ovector_size);
            if (rc > 0)
            {
                /* this pattern matched some of the input */
                int len = ovector[1] - ovector[0];
                if (len > longest_match_length)
                {
                    longest_match_length = len;
                    longest_match_index = i;
                    memcpy(longest_match_ovector, ovector, sizeof(ovector));
                }
            }
        }
        if (longest_match_index < 0)
        {
            /* no pattern matched the input at the current position */
            cerr << "Parse error" << endl;
            return false;
        }
        Matches matches(tokens[longest_match_index].re,
                &buff[0], longest_match_ovector, ovector_size);
        TokenRef token = buildToken(longest_match_index);
        if (token.isNull())
        {
            cerr << "Internal Error: null token" << endl;
            return false;
        }
        token->process(matches);
        m_tokens.push_back(token);
        buff_pos += longest_match_length;
    }
 }
 refptr<Node> Node::operator[](int index)
 {
    return (0 <= index && index < m_indexed_children.size())
        ? m_indexed_children[index]
        : NULL;
 }
 refptr<Node> Node::operator[](const std::string & index)
 {
    return (m_named_children.find(index) != m_named_children.end())
        ? m_named_children[index]
        : NULL;
 }
 void Token::process(const Matches & matches)
 {
    {%token_code%}
 }
 Matches::Matches(pcre * re, const char * data, int * ovector, int ovec_size)
    : m_re(re), m_data(data), m_ovector(ovector), m_ovec_size(ovec_size)
 {
 }
 std::string Matches::operator[](int index) const
 {
    if (0 <= index && index < (m_ovec_size / 3))
    {
        int idx = 2 * index;
        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
        {
            return string(m_data, m_ovector[idx],
                    m_ovector[idx + 1] - m_ovector[idx]);
        }
    }
    return "";
 }
 std::string Matches::operator[](const std::string & index) const
 {
    int idx = pcre_get_stringnumber(m_re, index.c_str());
    if (idx > 0 && idx < (m_ovec_size / 3))
    {
        if (m_ovector[idx] >= 0 && m_ovector[idx + 1] >= 0)
        {
            return string(m_data, m_ovector[idx],
                    m_ovector[idx + 1] - m_ovector[idx]);
        }
    }
    return "";
 }
 {%token_classes_code%}
 #ifdef I_NAMESPACE
 };
 #endif
--- a/tmpl/parser.h
+++ b/tmpl/parser.h
@ -1,181 +0,0 @@
 #ifndef IMBECILE_PARSER_HEADER
 #define IMBECILE_PARSER_HEADER
 #include <pcre.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <iostream>
 #include <map>
 #include <vector>
 #include <list>
 {%user_includes%}
 {%defines%}
 #ifdef I_NAMESPACE
 namespace I_NAMESPACE {
 #endif
 #ifndef REFPTR_H
 #define REFPTR_H REFPTR_H
 /* Author: Josh Holtrop
 * Purpose: Provide a reference-counting pointer-like first order
 *   C++ object that will free the object it is pointing to when
 *   all references to it have been destroyed.
 * This implementation does not solve the circular reference problem.
 * I was not concerned with that when developing this class.
 */
 #include <stdlib.h>             /* NULL */
 template <typename T>
 class refptr
 {
    public:
        refptr<T>();
        refptr<T>(T * ptr);
        refptr<T>(const refptr<T> & orig);
        refptr<T> & operator=(const refptr<T> & orig);
        refptr<T> & operator=(T * ptr);
        ~refptr<T>();
        T & operator*() const { return *m_ptr; }
        T * operator->() const { return m_ptr; }
        bool isNull() const { return m_ptr == NULL; }
    private:
        void cloneFrom(const refptr<T> & orig);
        void destroy();
        T * m_ptr;
        int * m_refCount;
 };
 template <typename T> refptr<T>::refptr()
 {
    m_ptr = NULL;
    m_refCount = NULL;
 }
 template <typename T> refptr<T>::refptr(T * ptr)
 {
    m_ptr = ptr;
    m_refCount = new int;
    *m_refCount = 1;
 }
 template <typename T> refptr<T>::refptr(const refptr<T> & orig)
 {
    cloneFrom(orig);
 }
 template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
 {
    destroy();
    cloneFrom(orig);
    return *this;
 }
 template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
 {
    destroy();
    m_ptr = ptr;
    m_refCount = new int;
    *m_refCount = 1;
    return *this;
 }
 template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
 {
    this->m_ptr = orig.m_ptr;
    this->m_refCount = orig.m_refCount;
    if (m_refCount != NULL)
        (*m_refCount)++;
 }
 template <typename T> refptr<T>::~refptr()
 {
    destroy();
 }
 template <typename T> void refptr<T>::destroy()
 {
    if (m_refCount != NULL)
    {
        if (*m_refCount <= 1)
        {
            delete m_ptr;
            delete m_refCount;
        }
        else
        {
            (*m_refCount)--;
        }
    }
 }
 #endif
 class Matches
 {
    public:
        Matches(pcre * re, const char * data, int * ovector, int ovec_size);
        std::string operator[](int index) const;
        std::string operator[](const std::string & index) const;
    protected:
        pcre * m_re;
        const char * m_data;
        int * m_ovector;
        int m_ovec_size;
 };
 class Node
 {
    public:
        refptr<Node> operator[](int index);
        refptr<Node> operator[](const std::string & index);
    protected:
        std::map< std::string, refptr<Node> > m_named_children;
        std::vector< refptr<Node> > m_indexed_children;
 };
 typedef refptr<Node> NodeRef;
 class Token : public Node
 {
    public:
        virtual void process(const Matches & matches);
        void setType(int type) { m_type = type; }
        int getType() const { return m_type; }
    protected:
        int m_type;
        {%token_data%}
 };
 typedef refptr<Token> TokenRef;
 {%token_classes%}
 class I_CLASSNAME
 {
    public:
        I_CLASSNAME();
        bool parse(std::istream & in);
        const char * getError() { return m_errstr; }
    protected:
        const char * m_errstr;
        std::list<TokenRef> m_tokens;
 };
 #ifdef I_NAMESPACE
 };
 #endif
 #endif /* IMBECILE_PARSER_HEADER */
Author	SHA1	Message	Date
Josh Holtrop	164a4854fb	Update README	2022-05-30 15:40:31 -04:00
Josh Holtrop	ddadc2008b	Rename to propane	2022-05-28 20:20:03 -04:00
Josh Holtrop	fbd215098b	Update license years	2022-05-27 21:49:54 -04:00
Josh Holtrop	bfe2916165	Update bundler	2022-05-27 00:15:03 -04:00
Josh Holtrop	c9bc4832f4	bundle update	2022-05-27 00:14:26 -04:00
Josh Holtrop	6dfef8573f	Fix ERB constructor call for Ruby 3.2 warnings	2022-05-27 00:12:40 -04:00
Josh Holtrop	f3ed678fe1	Store tokens in Hash by name	2021-09-27 21:40:12 -04:00
Josh Holtrop	280b749e38	Track Rule IDs	2021-09-27 21:29:44 -04:00
Josh Holtrop	d6779aef00	Start on Parser#build_tables	2021-09-22 23:26:36 -04:00
Josh Holtrop	746ec89be8	Add test for a rule that can be arrived at from multiple states	2021-09-21 21:40:11 -04:00
Josh Holtrop	997f34a1e4	Keep track of item set in-links	2021-09-21 21:32:18 -04:00
Josh Holtrop	a2795bb531	Keep track of follow item sets by symbol for each item set	2021-09-21 17:09:53 -04:00
Josh Holtrop	850e639e3a	update identical rule spec to use lookahead symbol	2021-09-06 20:18:17 -04:00
Josh Holtrop	5f7e548fe3	Remove Rule::Pattern, Item stores a Rule reference	2021-09-06 19:41:29 -04:00
Josh Holtrop	bdb10e7afc	test duplicate rules	2021-09-05 09:50:04 -04:00
Josh Holtrop	7bdaf7cdbc	Do not create item set following EOF token	2021-09-05 07:51:59 -04:00
Josh Holtrop	08e3516ad9	Add wikipedia LR(0) parser example test	2021-09-04 22:33:34 -04:00
Josh Holtrop	2c8f3c6e9a	Avoid infinite loop with self-referential rules	2021-09-04 22:29:10 -04:00
Josh Holtrop	9dffa3c41a	Recursively build item sets	2021-08-29 12:38:44 -04:00
Josh Holtrop	ceb7e9ee32	Add EOF token to Start rule patterns	2021-08-29 11:48:49 -04:00
Josh Holtrop	6026bf1514	Start building following item sets	2021-08-29 09:41:00 -04:00
Josh Holtrop	9cc1890ddc	One Rule object stores all alternative patterns	2021-08-28 10:28:50 -04:00
Josh Holtrop	e4f2fffe50	add Item#closed_items	2021-08-28 09:47:01 -04:00
Josh Holtrop	d931bcb513	Do not expand rules	2021-08-28 09:23:08 -04:00
Josh Holtrop	2e16b0bd6e	Start on Item and ItemSet	2021-08-28 09:02:19 -04:00
Josh Holtrop	6ce94e15af	Expand rules	2021-08-28 08:11:06 -04:00
Josh Holtrop	3f92ae46c4	Map rule components to Token/Rule references	2021-08-22 21:21:41 -04:00
Josh Holtrop	00016f16b3	Combine Grammar and Generator into top-level Imbecile class	2021-08-22 21:04:46 -04:00
Josh Holtrop	9273bfccf6	Move Token/Rule out of Grammar class	2021-08-19 20:00:40 -04:00
Josh Holtrop	f295acb593	Generator builds a Lexer, not a Lexer::DFA	2021-08-19 13:11:12 -04:00
Josh Holtrop	51a31317a6	Move FA#build_tables to Lexer::DFA	2021-08-19 11:55:34 -04:00
Josh Holtrop	9459883e74	Add Lexer class; Move LexerDFA to Lexer::DFA	2021-08-18 17:09:45 -04:00
Josh Holtrop	28591907c1	Move FA class out of Regex class	2021-08-18 17:05:03 -04:00
Josh Holtrop	37d6917b49	Rework Rule constructor	2021-07-27 21:22:46 -04:00
Josh Holtrop	2685c05360	Change rule syntax	2021-07-19 21:55:08 -04:00
Josh Holtrop	c0c3353fd7	Test lexing empty null string returns EOF	2021-07-06 12:06:07 -04:00
Josh Holtrop	3158e51059	Add length field to LexedToken	2021-07-06 11:59:35 -04:00
Josh Holtrop	d9e4f64d2e	Fix returning TOKEN_EOF when lexing at EOF	2021-07-06 11:55:44 -04:00
Josh Holtrop	ec2dcf9a72	Fix not progressing through input while lexing a token	2021-07-06 11:47:33 -04:00
Josh Holtrop	578e165e2d	Fix off-by-one error in state IDs	2021-07-06 11:44:03 -04:00
Josh Holtrop	e8df4296cc	Begin testing lexer	2021-07-06 11:09:39 -04:00
Josh Holtrop	230c324209	Fix iterating through all transitions in a state	2021-07-06 11:09:13 -04:00
Josh Holtrop	1271e19b50	Test multi-byte code point decoding	2021-07-06 11:02:43 -04:00
Josh Holtrop	12e11399af	Add decoder tests	2021-07-06 10:57:06 -04:00
Josh Holtrop	24fab8515d	Decoder.decode_code_point returns struct with code point and length together	2021-07-06 10:50:32 -04:00
Josh Holtrop	1dcdd87a28	Generate token constants and names to top-level parser class	2021-07-06 10:28:35 -04:00
Josh Holtrop	8aec7ec0de	Lexer class can be used standalone	2021-07-06 10:15:07 -04:00
Josh Holtrop	c96d55b031	Fix class name	2021-07-06 10:14:14 -04:00
Josh Holtrop	ca7d4862f9	Run test executable; build with unit tests	2021-07-06 10:03:42 -04:00
Josh Holtrop	3c874ae4c1	Compile generated parser with a test file	2021-07-05 23:05:55 -04:00
Josh Holtrop	748c219625	Do not return dropped tokens from Lexer.lex_token()	2021-07-05 22:53:58 -04:00
Josh Holtrop	71ee7de9f9	Remove obsolete lex() and lex_token() methods	2021-07-05 22:49:50 -04:00
Josh Holtrop	2121acc87e	Complete Lexer.lex_token()	2021-07-05 22:41:09 -04:00
Josh Holtrop	f2563cf255	Work on Lexer.lex_token()	2021-07-05 22:02:27 -04:00
Josh Holtrop	24d12be3b9	Add TOKEN enum entries for EOF, decode error, drop, and none	2021-07-05 20:11:55 -04:00
Josh Holtrop	91d6ee25ea	Add Lexer class	2021-07-05 19:13:41 -04:00
Josh Holtrop	2f1cb47bea	Add Decoder class to decode code points	2021-07-05 18:47:10 -04:00
Josh Holtrop	651461c570	Start on decode_code_point()	2021-06-29 23:17:44 -04:00
Josh Holtrop	3ce54bd303	Start on lex()/lex_token()	2021-06-29 23:10:40 -04:00
Josh Holtrop	15454f926a	Add TokenNames array	2021-06-29 22:54:24 -04:00
Josh Holtrop	4beb3d2016	Add some token constants	2021-06-27 23:09:42 -04:00
Josh Holtrop	aae7bc188c	Use unsigned literals	2021-06-26 18:11:20 -04:00
Josh Holtrop	a716dedeb6	Start on test framework to compile and run generated parser	2021-06-26 16:17:24 -04:00
Josh Holtrop	93cb25df62	Do not generate token names for drop tokens	2021-06-26 16:16:18 -04:00
Josh Holtrop	61dd5bc5a0	Move imbecile_spec to lexer_dfa_spec	2021-06-26 16:01:49 -04:00
Josh Holtrop	10a8ef5eb4	Update generated lexer state and transition tables	2021-06-26 15:58:36 -04:00
Josh Holtrop	98584ce07a	Add FA#build_tables	2021-06-24 15:06:10 -04:00
Josh Holtrop	2122ca02fe	Start generating lexer states and transitions	2021-06-23 23:15:02 -04:00
Josh Holtrop	5881f13380	Generate enum of token identifiers	2021-06-23 22:22:45 -04:00
Josh Holtrop	ebc1d8f001	Fix FA#to_s to show correct destination state	2021-06-23 22:21:53 -04:00
Josh Holtrop	5fecd5c6a2	Refactor into FA#enumerate	2021-06-22 22:01:39 -04:00
Josh Holtrop	5b688b090d	Add some attr_readers	2021-06-21 22:52:27 -04:00
Josh Holtrop	f77218801f	Error if Start rule not found	2021-06-21 22:48:17 -04:00
Josh Holtrop	70118dd019	Check for duplicate token/rule names in Generator	2021-06-21 22:34:43 -04:00
Josh Holtrop	d552f2a540	CLI: accept --log option	2021-06-19 12:06:02 -04:00
Josh Holtrop	d2fac07249	Add Generator class	2021-06-15 16:51:36 -04:00
Josh Holtrop	a34272dfd6	Add Grammar::Rule class	2021-06-14 22:49:43 -04:00
Josh Holtrop	9d05861819	Parse grammar input by multiline regex	2021-06-12 22:57:32 -04:00
Josh Holtrop	03035a25a5	Update spec task to accept an example pattern	2021-06-12 22:46:13 -04:00
Josh Holtrop	db70f8b94d	Add "drop" grammar keyword to drop patterns	2021-06-09 22:48:30 -04:00
Josh Holtrop	f67dd62b20	Add \s to expand to whitespace characters	2021-06-09 22:37:00 -04:00
Josh Holtrop	c6bac6d3a1	Rename TokenDFA -> LexerDFA	2021-06-08 13:54:46 -04:00
Josh Holtrop	aa92970c31	Add some lexer tests	2021-06-07 22:21:52 -04:00
Josh Holtrop	b8282e748e	Start on a test lexer for lexer specs	2021-06-07 17:17:37 -04:00
Josh Holtrop	930ac56148	Do not accept 0-length tokens	2021-06-06 15:29:30 -04:00
Josh Holtrop	7f54778ba8	Rename Regex::DFA to TokenDFA	2021-06-06 15:18:21 -04:00
Josh Holtrop	701903def2	Token should build its own NFA	2021-06-06 14:09:28 -04:00
Josh Holtrop	afea886ecb	Add Grammar::Token class	2021-06-06 14:04:33 -04:00
Josh Holtrop	03b2e87186	Grammar takes in input string instead of file name	2021-06-06 10:09:53 -04:00
Josh Holtrop	e4370cac62	Print accepting token in FA#to_s	2021-06-06 09:59:28 -04:00
Josh Holtrop	ed3f599e25	Create common FA/State/Transition classes across NFA/DFA	2021-06-06 09:41:23 -04:00
Josh Holtrop	1228a76c55	Fix MultiplicityUnit#to_nfa again	2021-05-26 10:17:03 -04:00
Josh Holtrop	538e360cb3	Fix MultiplicityUnit#to_nfa	2021-05-25 16:59:22 -04:00
Josh Holtrop	e7f8c3726c	Fix NFA#to_s	2021-05-25 16:14:19 -04:00
Josh Holtrop	b6e3a5c151	Record accepting token in DFA state	2021-05-25 16:00:25 -04:00
Josh Holtrop	35ef94dbd3	Print out DFA to test	2021-05-25 15:52:47 -04:00
Josh Holtrop	37e1252ded	Continue building DFA	2021-05-25 15:44:23 -04:00
Josh Holtrop	214ece7d90	Add NFA::Transition, start on DFA construction	2021-05-23 21:41:50 -04:00
Josh Holtrop	8473df421a	Add specs for CodePointRange	2021-05-23 20:41:40 -04:00
Josh Holtrop	3987f08cd7	Add CodePointRange class	2021-05-23 17:52:20 -04:00
Josh Holtrop	3a1650906e	Show non-printable characters better in NFA#to_s	2021-05-21 14:39:02 -04:00
Josh Holtrop	952bffc33c	Move DFA#nil_transition_states to NFA::State	2021-05-21 14:27:42 -04:00
Josh Holtrop	f64f3683c6	Add NFA#to_s	2021-05-21 14:24:16 -04:00
Josh Holtrop	43f5caf449	Fix some NFA creation	2021-05-20 17:34:18 -04:00
Josh Holtrop	f38a7456e9	Add DFA#nil_transition_states	2021-05-20 17:08:34 -04:00
Josh Holtrop	c77c81bf25	Mark regex NFA end state as accepting the token	2021-05-18 16:34:26 -04:00
Josh Holtrop	7196a0605a	Add DFA class	2021-05-18 16:31:16 -04:00
Josh Holtrop	24054461a2	Merge Regex::Parser into Regex, move Unit to its own file	2021-05-18 16:14:42 -04:00
Josh Holtrop	89a5976064	Make Regex::Parser build a NFA after parsing	2021-05-18 16:07:39 -04:00
Josh Holtrop	d3df67be1e	Update rake	2021-05-18 16:03:14 -04:00
Josh Holtrop	791340b292	Build NFA for each token pattern	2021-05-17 22:57:18 -04:00
Josh Holtrop	cf8718b69c	Allow token definition with no pattern	2021-05-17 22:40:23 -04:00
Josh Holtrop	39f164a7db	Parse . in a regex	2021-05-17 17:20:56 -04:00
Josh Holtrop	70b3e56de2	Store all characters as ranges; add CharacterClassUnit#to_nfa	2021-05-14 13:52:03 -04:00
Josh Holtrop	2e8e72a1e8	Add CharacterClassUnit and use it instead of AlternatesUnit	2021-05-14 12:32:53 -04:00
Josh Holtrop	ea27baa630	Add #to_nfa for other regex unit types	2021-05-13 15:57:09 -04:00
Josh Holtrop	d8dd64d860	Add NFA class Start converting units to NFAs	2021-05-13 00:01:12 -04:00
Josh Holtrop	54cefda186	Use Parser	2021-05-11 16:52:28 -04:00
Josh Holtrop	201a38fb51	Add Parser specs	2021-05-11 15:29:40 -04:00
Josh Holtrop	33f9d01883	Rename start/end to min/max for CharacterRangeUnit	2021-05-11 15:28:45 -04:00
Josh Holtrop	9b09625c8a	Fix parsing - at beginning of negated character class	2021-05-11 14:57:16 -04:00
Josh Holtrop	6119d860bc	Fix character class parsing into an AlternatesUnit	2021-05-11 14:57:01 -04:00
Josh Holtrop	611ebeeddd	Fix max multiplicity count parsing	2021-05-11 11:37:46 -04:00
Josh Holtrop	449eec4982	Fix multiplicity count parsing	2021-05-11 11:33:10 -04:00
Josh Holtrop	8cd648fc8f	Create spec file for Parser	2021-05-07 16:58:38 -04:00
Josh Holtrop	885ef6c151	Rename Regex::Unit -> Regex::Parser	2021-05-07 16:57:05 -04:00
Josh Holtrop	60adffbbab	Add rspec	2021-05-07 15:16:01 -04:00
Josh Holtrop	b8c01ca1d1	Move Unit stuff from Imbecile::Regex to Imbecile::Regex::Unit	2021-05-07 15:10:51 -04:00
Josh Holtrop	b04ff56308	Add Regex class	2021-05-02 15:22:45 -04:00
Josh Holtrop	ca1d2d1e5c	Fix class name determination from output file name	2021-05-01 17:01:15 -04:00
Josh Holtrop	13403405b0	Add Error class to handle grammar loading errors	2021-05-01 16:54:24 -04:00
Josh Holtrop	07dd68e367	Write output file from ERB template	2021-05-01 16:44:01 -04:00
Josh Holtrop	c1666a1e74	Require output file on command line	2021-05-01 14:52:16 -04:00
Josh Holtrop	768a0ef17f	Extract class name from grammar file	2021-05-01 14:34:00 -04:00
Josh Holtrop	9e865d1982	Throw error on unexpected grammar input line	2021-05-01 09:40:22 -04:00
Josh Holtrop	9884047090	Skip blank lines	2021-05-01 09:39:19 -04:00
Josh Holtrop	04393dcc51	Check for duplicate token names; skip comment lines	2021-05-01 09:38:08 -04:00
Josh Holtrop	7f27b3fd6f	Exit with CLI exit code	2021-05-01 09:34:38 -04:00
Josh Holtrop	37ad87d602	Rename GrammarParser -> Grammar	2021-05-01 09:33:35 -04:00
Josh Holtrop	23b7782a5d	Begin parsing grammar	2021-05-01 09:31:12 -04:00
Josh Holtrop	0cc4516c0e	Add GrammarParser class to parse input file	2021-05-01 08:22:14 -04:00
Josh Holtrop	75a1049040	Parse command-line options	2021-05-01 08:16:09 -04:00
Josh Holtrop	a9ff93dda4	Add script to test run	2021-05-01 08:16:01 -04:00
Josh Holtrop	d879a93d09	Add bin/imbecile and Imbecile::CLI module	2021-04-29 23:26:52 -04:00
Josh Holtrop	ee27c5e9b1	Add Gemfile.lock	2021-04-29 23:26:37 -04:00
Josh Holtrop	989e5f47de	Edit some gemspec fields	2021-04-29 23:26:29 -04:00
Josh Holtrop	04e17cde30	Add "bundle gem"-generated files	2021-04-29 23:22:23 -04:00
Josh Holtrop	bc217e7ddb	Start on ruby branch	2021-04-29 23:18:22 -04:00
		`@ -0,0 +1,2 @@`
							`#!/bin/sh`
							`exec bundle exec ruby -Ilib bin/propane "$@"`
		`@ -1 +0,0 @@`
			`Subproject commit e2c7e88824c18eb3b218f6308db0194edb422eef`