initial content added with Unicode deserializer

This commit is contained in:
Josh Holtrop 2010-03-29 15:05:57 -04:00
parent a6dd466148
commit 9b838e93a4
6 changed files with 242 additions and 0 deletions

28
Makefile Normal file
View File

@ -0,0 +1,28 @@
TARGET := imbecile
CXXOBJS := $(patsubst %.cc,%.o,$(wildcard *.cc))
CXXDEPS := $(CXXOBJS:.o=.dep)
CXXFLAGS := -O2
DEPS := $(CXXDEPS)
OBJS := $(CXXOBJS)
all: $(TARGET)
$(TARGET): $(OBJS)
$(CXX) -o $@ $^ $(LDFLAGS)
# Object file rules
%.o: %.cc
$(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $<
# Make dependency files
%.dep: %.c
@set -e; rm -f $@; \
$(CC) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
%.dep: %.cc
@set -e; rm -f $@; \
$(CXX) -MM $(CPPFLAGS) $< | sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' > $@
clean:
-rm -f $(TARGET) *.o *.dep

42
imbecile.cc Normal file
View File

@ -0,0 +1,42 @@
#include <iostream>
#include <fstream>
#include <getopt.h>
#include <iconv.h>
#include "refptr/refptr.h"
#include "serialize.h"
#include "unicode.h"
using namespace std;
int main(int argc, char * argv[])
{
int longind = 1;
int opt;
const char * encoding = "UTF-8";
static struct option longopts[] = {
/* name, has_arg, flag, val */
{ "encoding", required_argument, NULL, 'e' },
{ NULL, 0, NULL, 0 }
};
while ((opt = getopt_long(argc, argv, "", longopts, &longind)) != -1)
{
switch (opt)
{
case 'e': /* encoding */
encoding = optarg;
break;
}
}
ifstream ifs(optarg);
refptr< vector<unichar_t> > ucs_str = deserialize(encoding, ifs);
if (ucs_str.isNull())
{
cerr << "Error deserializing input file." << endl;
return 1;
}
return 0;
}

99
refptr/refptr.h Normal file
View File

@ -0,0 +1,99 @@
#ifndef REFPTR_H
#define REFPTR_H REFPTR_H
/* Author: Josh Holtrop
* Purpose: Provide a reference-counting pointer-like first order
* C++ object that will free the object it is pointing to when
* all references to it have been destroyed.
* This implementation does not solve the circular reference problem.
* I was not concerned with that when developing this class.
*/
#include <stdlib.h> /* NULL */
template <typename T>
class refptr
{
public:
refptr<T>();
refptr<T>(T * ptr);
refptr<T>(const refptr<T> & orig);
refptr<T> & operator=(const refptr<T> & orig);
refptr<T> & operator=(T * ptr);
~refptr<T>();
T & operator*() const { return *m_ptr; }
T * operator->() const { return m_ptr; }
bool isNull() const { return m_ptr == NULL; }
private:
void cloneFrom(const refptr<T> & orig);
void destroy();
T * m_ptr;
int * m_refCount;
};
template <typename T> refptr<T>::refptr()
{
m_ptr = NULL;
m_refCount = NULL;
}
template <typename T> refptr<T>::refptr(T * ptr)
{
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
}
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
{
cloneFrom(orig);
}
template <typename T> refptr<T> & refptr<T>::operator=(const refptr<T> & orig)
{
destroy();
cloneFrom(orig);
return *this;
}
template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
{
destroy();
m_ptr = ptr;
m_refCount = new int;
*m_refCount = 1;
return *this;
}
template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
{
this->m_ptr = orig.m_ptr;
this->m_refCount = orig.m_refCount;
if (m_refCount != NULL)
(*m_refCount)++;
}
template <typename T> refptr<T>::~refptr()
{
destroy();
}
template <typename T> void refptr<T>::destroy()
{
if (m_refCount != NULL)
{
if (*m_refCount <= 1)
{
delete m_ptr;
delete m_refCount;
}
else
{
(*m_refCount)--;
}
}
}
#endif

52
serialize.cc Normal file
View File

@ -0,0 +1,52 @@
#include "serialize.h"
#include <string.h>
using namespace std;
refptr< vector<unichar_t> > deserialize(const char * encoding, istream & in)
{
const int buf_size = 200;
int num_read;
char inbuf[buf_size];
char * inbuf_ptr = (char *) &inbuf[0];
unichar_t outbuf[buf_size];
char * outbuf_ptr;
size_t bytes_converted, inbytesleft = 0, outbytesleft;
refptr< vector<unichar_t> > ucs = new vector<unichar_t>();
iconv_t cd = iconv_open(encoding, "UTF-32");
if (cd == (iconv_t) -1)
{
cerr << "iconv_open() error" << endl;
return NULL;
}
for (;;)
{
in.read(&inbuf[0], sizeof(inbuf) - inbytesleft);
num_read = in.gcount();
if (num_read <= 0)
break;
outbuf_ptr = (char *) &outbuf[0];
outbytesleft = sizeof(outbuf);
bytes_converted = iconv(cd, &inbuf_ptr, &inbytesleft,
&outbuf_ptr, &outbytesleft);
if (inbytesleft > 0)
{
memmove(&inbuf[0], inbuf_ptr, inbytesleft);
inbuf_ptr = (char *) &inbuf[0];
}
for (int i = 0; i < (bytes_converted / sizeof(outbuf[0])); i++)
{
ucs->push_back(outbuf[i]);
}
if (bytes_converted & 0x3)
cerr << "Warning: bytes_converted = " << bytes_converted << endl;
if (in.eof())
break;
}
iconv_close(cd);
return ucs;
}

13
serialize.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef SERIALIZE_H
#include <iostream>
#include <iconv.h>
#include <vector>
#include "refptr/refptr.h"
#include "unicode.h"
using namespace std;
refptr< vector<unichar_t> > deserialize(const char * encoding, istream & in);
#endif

8
unicode.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef UNICODE_H
#include <stdint.h>
typedef uint32_t unichar_t;
#endif