FileReader: read lines from file buffer, detect line endings
This commit is contained in:
parent
85f2ca91c3
commit
346dbaa783
@ -4,22 +4,37 @@
|
||||
#include "jes/Ref.h"
|
||||
#include "jes/Text.h"
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
namespace jes
|
||||
{
|
||||
class FileReader
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
LINE_ENDING_LF,
|
||||
LINE_ENDING_CR,
|
||||
LINE_ENDING_CRLF,
|
||||
LINE_ENDING_COUNT
|
||||
};
|
||||
|
||||
FileReader();
|
||||
~FileReader();
|
||||
bool load(const char * fname);
|
||||
void close();
|
||||
unsigned int num_lines() { return m_num_lines; }
|
||||
TextRef get_line(unsigned int line_no);
|
||||
int get_line_endings() { return m_line_endings; }
|
||||
protected:
|
||||
typedef std::pair<const uint8_t *, size_t> LineIndexPair;
|
||||
typedef std::vector<LineIndexPair> LineIndexPairVector;
|
||||
typedef Ref<LineIndexPairVector> LineIndexPairVectorRef;
|
||||
void load_buf(size_t size);
|
||||
int m_fd;
|
||||
unsigned int m_num_lines;
|
||||
uint8_t * m_buf;
|
||||
int m_line_endings;
|
||||
LineIndexPairVectorRef m_lines;
|
||||
};
|
||||
typedef Ref<FileReader> FileReaderRef;
|
||||
}
|
||||
|
@ -13,11 +13,17 @@ namespace jes
|
||||
m_fd = 0;
|
||||
m_num_lines = 0u;
|
||||
m_buf = NULL;
|
||||
m_line_endings = LINE_ENDING_COUNT;
|
||||
m_lines = NULL;
|
||||
}
|
||||
|
||||
FileReader::~FileReader()
|
||||
{
|
||||
close();
|
||||
if (m_buf != NULL)
|
||||
{
|
||||
delete[] m_buf;
|
||||
m_buf = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool FileReader::load(const char * fname)
|
||||
@ -48,25 +54,76 @@ namespace jes
|
||||
|
||||
if (read(m_fd, m_buf, st.st_size) != st.st_size)
|
||||
{
|
||||
m_buf = NULL;
|
||||
delete[] m_buf;
|
||||
m_buf = NULL;
|
||||
close(m_fd);
|
||||
m_fd = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
close(m_fd);
|
||||
m_fd = 0;
|
||||
|
||||
load_buf(st.st_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void FileReader::close()
|
||||
void FileReader::load_buf(size_t size)
|
||||
{
|
||||
if (m_buf != NULL)
|
||||
LineIndexPairVectorRef lines[LINE_ENDING_COUNT];
|
||||
size_t line_start[LINE_ENDING_COUNT] = {0};
|
||||
unsigned int n_cr = 0;
|
||||
unsigned int n_lf = 0;
|
||||
bool crlf = true;
|
||||
for (size_t i = 0; i < LINE_ENDING_COUNT; i++)
|
||||
{
|
||||
delete[] m_buf;
|
||||
m_buf = NULL;
|
||||
lines[i] = new LineIndexPairVector();
|
||||
}
|
||||
if (m_fd != 0)
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
::close(m_fd);
|
||||
m_fd = 0;
|
||||
if (m_buf[i] == '\r')
|
||||
{
|
||||
lines[LINE_ENDING_CR]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CR]], i - line_start[LINE_ENDING_CR]));
|
||||
n_cr++;
|
||||
line_start[LINE_ENDING_CR] = i + 1;
|
||||
if (crlf)
|
||||
{
|
||||
if ((i < (size - 1)) && (m_buf[i + 1] == '\n'))
|
||||
{
|
||||
lines[LINE_ENDING_CRLF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CRLF]], i - line_start[LINE_ENDING_CRLF]));
|
||||
n_lf++;
|
||||
i++;
|
||||
line_start[LINE_ENDING_CRLF] = i + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
crlf = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (m_buf[i] == '\n')
|
||||
{
|
||||
lines[LINE_ENDING_LF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_LF]], i - line_start[LINE_ENDING_LF]));
|
||||
crlf = false;
|
||||
n_lf++;
|
||||
line_start[LINE_ENDING_LF] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (crlf && (n_lf > 0u))
|
||||
{
|
||||
m_line_endings = LINE_ENDING_CRLF;
|
||||
m_lines = lines[LINE_ENDING_CRLF];
|
||||
}
|
||||
else if ((n_cr > 0u) && (n_lf == 0u))
|
||||
{
|
||||
m_line_endings = LINE_ENDING_CR;
|
||||
m_lines = lines[LINE_ENDING_CR];
|
||||
}
|
||||
else
|
||||
{
|
||||
m_line_endings = LINE_ENDING_LF;
|
||||
m_lines = lines[LINE_ENDING_LF];
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user