121 lines
3.0 KiB
C++
121 lines
3.0 KiB
C++
#include "TextLoader.h"
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
/** Create a TextLoader. */
|
|
TextLoader::TextLoader()
|
|
{
|
|
m_line_endings = LineEndings::LF;
|
|
m_encoding = Encoding::UTF_8;
|
|
m_eol_at_eof = true;
|
|
m_num_lines = 0u;
|
|
}
|
|
|
|
/**
|
|
* Scan text to detect line endings and record their positions.
|
|
*
|
|
* @param buffer Buffer containing the text to import.
|
|
* @param size Size of the text to load. The buffer must be at least one
|
|
* byte larger than this.
|
|
* @param out_size Size of the loaded buffer.
|
|
*/
|
|
void TextLoader::load_buffer(uint8_t * buffer, size_t size, size_t * out_size)
|
|
{
|
|
std::list<size_t> cr_indexes;
|
|
size_t n_lines[LineEndings::COUNT] = {0};
|
|
bool crlf = true;
|
|
size_t next_start_of_line_index = 0u;
|
|
size_t out_size_local = size;
|
|
for (size_t i = 0; i < size; i++)
|
|
{
|
|
if (buffer[i] == '\r')
|
|
{
|
|
cr_indexes.push_back(i);
|
|
n_lines[LineEndings::CR]++;
|
|
if (crlf)
|
|
{
|
|
if ((i < (size - 1)) && (buffer[i + 1] == '\n'))
|
|
{
|
|
n_lines[LineEndings::LF]++;
|
|
n_lines[LineEndings::CRLF]++;
|
|
i++;
|
|
}
|
|
else
|
|
{
|
|
crlf = false;
|
|
}
|
|
}
|
|
next_start_of_line_index = i + 1;
|
|
}
|
|
else if (buffer[i] == '\n')
|
|
{
|
|
crlf = false;
|
|
n_lines[LineEndings::LF]++;
|
|
next_start_of_line_index = i + 1;
|
|
}
|
|
}
|
|
|
|
if (crlf && (n_lines[LineEndings::LF] > 0u))
|
|
{
|
|
m_line_endings = LineEndings::CRLF;
|
|
}
|
|
else if ((n_lines[LineEndings::CR] > 0u) && (n_lines[LineEndings::LF] == 0u))
|
|
{
|
|
m_line_endings = LineEndings::CR;
|
|
}
|
|
else
|
|
{
|
|
m_line_endings = LineEndings::LF;
|
|
}
|
|
|
|
m_num_lines = n_lines[m_line_endings];
|
|
|
|
/* Check if there is a line that was not terminated by a EOL sequence at
|
|
* the end of the file. */
|
|
if (next_start_of_line_index < size)
|
|
{
|
|
m_num_lines++;
|
|
m_eol_at_eof = false;
|
|
if (crlf)
|
|
{
|
|
cr_indexes.push_back(size);
|
|
}
|
|
else
|
|
{
|
|
buffer[size] = '\n';
|
|
out_size_local++;
|
|
}
|
|
}
|
|
|
|
m_encoding = Encoding::detect_encoding(buffer, size);
|
|
|
|
if (m_line_endings == LineEndings::CRLF)
|
|
{
|
|
/* Compress all CRLF sequences to LF in memory. */
|
|
size_t dest = 0u;
|
|
size_t src = 0u;
|
|
for (auto cr_index : cr_indexes)
|
|
{
|
|
size_t size = cr_index - src;
|
|
if ((src != dest) && (size > 0u))
|
|
{
|
|
memmove(&buffer[dest], &buffer[src], size);
|
|
}
|
|
dest += size;
|
|
src = cr_index + 2u;
|
|
buffer[dest++] = '\n';
|
|
}
|
|
out_size_local = dest;
|
|
}
|
|
else if (m_line_endings == LineEndings::CR)
|
|
{
|
|
/* Convert all \r to \n */
|
|
for (auto cr_index : cr_indexes)
|
|
{
|
|
buffer[cr_index] = '\n';
|
|
}
|
|
}
|
|
|
|
*out_size = out_size_local;
|
|
}
|