FileReader: read lines from file buffer, detect line endings

This commit is contained in:
Josh Holtrop 2014-06-05 15:36:55 -04:00
parent 85f2ca91c3
commit 346dbaa783
2 changed files with 82 additions and 10 deletions

View File

@ -4,22 +4,37 @@
#include "jes/Ref.h"
#include "jes/Text.h"
#include <stdint.h>
#include <vector>
namespace jes
{
class FileReader
{
public:
enum
{
LINE_ENDING_LF,
LINE_ENDING_CR,
LINE_ENDING_CRLF,
LINE_ENDING_COUNT
};
FileReader();
~FileReader();
bool load(const char * fname);
void close();
unsigned int num_lines() { return m_num_lines; }
TextRef get_line(unsigned int line_no);
int get_line_endings() { return m_line_endings; }
protected:
typedef std::pair<const uint8_t *, size_t> LineIndexPair;
typedef std::vector<LineIndexPair> LineIndexPairVector;
typedef Ref<LineIndexPairVector> LineIndexPairVectorRef;
void load_buf(size_t size);
int m_fd;
unsigned int m_num_lines;
uint8_t * m_buf;
int m_line_endings;
LineIndexPairVectorRef m_lines;
};
typedef Ref<FileReader> FileReaderRef;
}

View File

@ -13,11 +13,17 @@ namespace jes
m_fd = 0;
m_num_lines = 0u;
m_buf = NULL;
m_line_endings = LINE_ENDING_COUNT;
m_lines = NULL;
}
FileReader::~FileReader()
{
close();
if (m_buf != NULL)
{
delete[] m_buf;
m_buf = NULL;
}
}
bool FileReader::load(const char * fname)
@ -48,25 +54,76 @@ namespace jes
if (read(m_fd, m_buf, st.st_size) != st.st_size)
{
m_buf = NULL;
delete[] m_buf;
m_buf = NULL;
close(m_fd);
m_fd = 0;
return false;
}
close(m_fd);
m_fd = 0;
load_buf(st.st_size);
return true;
}
void FileReader::close()
void FileReader::load_buf(size_t size)
{
if (m_buf != NULL)
LineIndexPairVectorRef lines[LINE_ENDING_COUNT];
size_t line_start[LINE_ENDING_COUNT] = {0};
unsigned int n_cr = 0;
unsigned int n_lf = 0;
bool crlf = true;
for (size_t i = 0; i < LINE_ENDING_COUNT; i++)
{
delete[] m_buf;
m_buf = NULL;
lines[i] = new LineIndexPairVector();
}
if (m_fd != 0)
for (size_t i = 0; i < size; i++)
{
::close(m_fd);
m_fd = 0;
if (m_buf[i] == '\r')
{
lines[LINE_ENDING_CR]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CR]], i - line_start[LINE_ENDING_CR]));
n_cr++;
line_start[LINE_ENDING_CR] = i + 1;
if (crlf)
{
if ((i < (size - 1)) && (m_buf[i + 1] == '\n'))
{
lines[LINE_ENDING_CRLF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CRLF]], i - line_start[LINE_ENDING_CRLF]));
n_lf++;
i++;
line_start[LINE_ENDING_CRLF] = i + 1;
}
else
{
crlf = false;
}
}
}
else if (m_buf[i] == '\n')
{
lines[LINE_ENDING_LF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_LF]], i - line_start[LINE_ENDING_LF]));
crlf = false;
n_lf++;
line_start[LINE_ENDING_LF] = i + 1;
}
}
if (crlf && (n_lf > 0u))
{
m_line_endings = LINE_ENDING_CRLF;
m_lines = lines[LINE_ENDING_CRLF];
}
else if ((n_cr > 0u) && (n_lf == 0u))
{
m_line_endings = LINE_ENDING_CR;
m_lines = lines[LINE_ENDING_CR];
}
else
{
m_line_endings = LINE_ENDING_LF;
m_lines = lines[LINE_ENDING_LF];
}
}