add Encoding::decode()

This commit is contained in:
Josh Holtrop 2016-08-15 22:14:47 -04:00
parent b2bed71ee4
commit f659c14242
4 changed files with 66 additions and 2 deletions

View File

@ -13,6 +13,7 @@ Buffer::Buffer()
m_eol_at_eof = true;
m_line_endings = LineEndings::LF;
m_encoding = Encoding::UTF_8;
piece_table->encoding = m_encoding;
}
Buffer::~Buffer()
@ -65,6 +66,7 @@ bool Buffer::load_from_file(const char * filename)
m_eol_at_eof = text_loader.get_eol_at_eof();
m_line_endings = text_loader.get_line_endings();
m_encoding = text_loader.get_encoding();
piece_table->encoding = m_encoding;
return true;
}

View File

@ -96,3 +96,63 @@ const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * enc
return nullptr;
}
uint32_t Encoding::decode(Type type, const uint8_t * encoded)
{
switch (type)
{
case UTF_8:
{
const uint8_t c = *encoded;
uint8_t following_bytes = 0u;
uint32_t v;
if ((c & 0x80u) == 0u)
{
return c;
}
else if ((c & 0xE0u) == 0xC0u)
{
v = c & 0x1Fu;
following_bytes = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
v = c & 0x0Fu;
following_bytes = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
v = c & 0x07u;
following_bytes = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
v = c & 0x03u;
following_bytes = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
v = c & 0x01u;
following_bytes = 5u;
}
else
{
return 0u;
}
while (following_bytes-- > 0u)
{
encoded++;
v <<= 6u;
v |= *encoded & 0x3Fu;
}
return v;
}
break;
case CP_1252:
/* TODO: map byte to code point */
return *encoded;
break;
}
return 0u;
}

View File

@ -16,6 +16,7 @@ public:
static Type detect_encoding(const uint8_t * buffer, size_t length);
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
static uint32_t decode(Type type, const uint8_t * encoded);
};
#endif

View File

@ -6,6 +6,7 @@
#include <utility>
#include <memory>
#include <list>
#include "Encoding.h"
class PieceTable
{
@ -64,8 +65,7 @@ public:
/** Get the character pointed to by the cursor. */
uint32_t operator*() const
{
/* TODO: Use Encoding */
return piece->start[offset];
return Encoding::decode(piece_table->encoding, &piece->start[offset]);
}
/**
@ -92,6 +92,7 @@ public:
Piece * start_piece;
Piece * end_piece;
uint8_t tabstop;
Encoding::Type encoding;
PieceTable(const uint8_t * file_buffer, unsigned long file_buffer_size);