diff --git a/src/lib/include/jes/FileReader.h b/src/lib/include/jes/FileReader.h index 188fd44..657cd5e 100644 --- a/src/lib/include/jes/FileReader.h +++ b/src/lib/include/jes/FileReader.h @@ -4,22 +4,37 @@ #include "jes/Ref.h" #include "jes/Text.h" #include +#include namespace jes { class FileReader { public: + enum + { + LINE_ENDING_LF, + LINE_ENDING_CR, + LINE_ENDING_CRLF, + LINE_ENDING_COUNT + }; + FileReader(); ~FileReader(); bool load(const char * fname); - void close(); unsigned int num_lines() { return m_num_lines; } TextRef get_line(unsigned int line_no); + int get_line_endings() { return m_line_endings; } protected: + typedef std::pair LineIndexPair; + typedef std::vector LineIndexPairVector; + typedef Ref LineIndexPairVectorRef; + void load_buf(size_t size); int m_fd; unsigned int m_num_lines; uint8_t * m_buf; + int m_line_endings; + LineIndexPairVectorRef m_lines; }; typedef Ref FileReaderRef; } diff --git a/src/lib/src/FileReader.cc b/src/lib/src/FileReader.cc index c4c7b9c..40bccb2 100644 --- a/src/lib/src/FileReader.cc +++ b/src/lib/src/FileReader.cc @@ -13,11 +13,17 @@ namespace jes m_fd = 0; m_num_lines = 0u; m_buf = NULL; + m_line_endings = LINE_ENDING_COUNT; + m_lines = NULL; } FileReader::~FileReader() { - close(); + if (m_buf != NULL) + { + delete[] m_buf; + m_buf = NULL; + } } bool FileReader::load(const char * fname) @@ -48,25 +54,76 @@ namespace jes if (read(m_fd, m_buf, st.st_size) != st.st_size) { - m_buf = NULL; delete[] m_buf; + m_buf = NULL; + close(m_fd); + m_fd = 0; return false; } + close(m_fd); + m_fd = 0; + + load_buf(st.st_size); return true; } - void FileReader::close() + void FileReader::load_buf(size_t size) { - if (m_buf != NULL) + LineIndexPairVectorRef lines[LINE_ENDING_COUNT]; + size_t line_start[LINE_ENDING_COUNT] = {0}; + unsigned int n_cr = 0; + unsigned int n_lf = 0; + bool crlf = true; + for (size_t i = 0; i < LINE_ENDING_COUNT; i++) { - delete[] m_buf; - m_buf = NULL; + lines[i] = new LineIndexPairVector(); } - if (m_fd != 0) + for (size_t i = 0; i < size; i++) { - ::close(m_fd); - m_fd = 0; + if (m_buf[i] == '\r') + { + lines[LINE_ENDING_CR]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CR]], i - line_start[LINE_ENDING_CR])); + n_cr++; + line_start[LINE_ENDING_CR] = i + 1; + if (crlf) + { + if ((i < (size - 1)) && (m_buf[i + 1] == '\n')) + { + lines[LINE_ENDING_CRLF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_CRLF]], i - line_start[LINE_ENDING_CRLF])); + n_lf++; + i++; + line_start[LINE_ENDING_CRLF] = i + 1; + } + else + { + crlf = false; + } + } + } + else if (m_buf[i] == '\n') + { + lines[LINE_ENDING_LF]->push_back(LineIndexPair(&m_buf[line_start[LINE_ENDING_LF]], i - line_start[LINE_ENDING_LF])); + crlf = false; + n_lf++; + line_start[LINE_ENDING_LF] = i + 1; + } + } + + if (crlf && (n_lf > 0u)) + { + m_line_endings = LINE_ENDING_CRLF; + m_lines = lines[LINE_ENDING_CRLF]; + } + else if ((n_cr > 0u) && (n_lf == 0u)) + { + m_line_endings = LINE_ENDING_CR; + m_lines = lines[LINE_ENDING_CR]; + } + else + { + m_line_endings = LINE_ENDING_LF; + m_lines = lines[LINE_ENDING_LF]; } }