add Encoding::decode()
This commit is contained in:
parent
b2bed71ee4
commit
f659c14242
@ -13,6 +13,7 @@ Buffer::Buffer()
|
||||
m_eol_at_eof = true;
|
||||
m_line_endings = LineEndings::LF;
|
||||
m_encoding = Encoding::UTF_8;
|
||||
piece_table->encoding = m_encoding;
|
||||
}
|
||||
|
||||
Buffer::~Buffer()
|
||||
@ -65,6 +66,7 @@ bool Buffer::load_from_file(const char * filename)
|
||||
m_eol_at_eof = text_loader.get_eol_at_eof();
|
||||
m_line_endings = text_loader.get_line_endings();
|
||||
m_encoding = text_loader.get_encoding();
|
||||
piece_table->encoding = m_encoding;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -96,3 +96,63 @@ const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * enc
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t Encoding::decode(Type type, const uint8_t * encoded)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case UTF_8:
|
||||
{
|
||||
const uint8_t c = *encoded;
|
||||
uint8_t following_bytes = 0u;
|
||||
uint32_t v;
|
||||
if ((c & 0x80u) == 0u)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
else if ((c & 0xE0u) == 0xC0u)
|
||||
{
|
||||
v = c & 0x1Fu;
|
||||
following_bytes = 1u;
|
||||
}
|
||||
else if ((c & 0xF0u) == 0xE0u)
|
||||
{
|
||||
v = c & 0x0Fu;
|
||||
following_bytes = 2u;
|
||||
}
|
||||
else if ((c & 0xF8u) == 0xF0u)
|
||||
{
|
||||
v = c & 0x07u;
|
||||
following_bytes = 3u;
|
||||
}
|
||||
else if ((c & 0xFCu) == 0xF8u)
|
||||
{
|
||||
v = c & 0x03u;
|
||||
following_bytes = 4u;
|
||||
}
|
||||
else if ((c & 0xFEu) == 0xFCu)
|
||||
{
|
||||
v = c & 0x01u;
|
||||
following_bytes = 5u;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0u;
|
||||
}
|
||||
while (following_bytes-- > 0u)
|
||||
{
|
||||
encoded++;
|
||||
v <<= 6u;
|
||||
v |= *encoded & 0x3Fu;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
break;
|
||||
case CP_1252:
|
||||
/* TODO: map byte to code point */
|
||||
return *encoded;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0u;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ public:
|
||||
static Type detect_encoding(const uint8_t * buffer, size_t length);
|
||||
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
|
||||
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
|
||||
static uint32_t decode(Type type, const uint8_t * encoded);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <list>
|
||||
#include "Encoding.h"
|
||||
|
||||
class PieceTable
|
||||
{
|
||||
@ -64,8 +65,7 @@ public:
|
||||
/** Get the character pointed to by the cursor. */
|
||||
uint32_t operator*() const
|
||||
{
|
||||
/* TODO: Use Encoding */
|
||||
return piece->start[offset];
|
||||
return Encoding::decode(piece_table->encoding, &piece->start[offset]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -92,6 +92,7 @@ public:
|
||||
Piece * start_piece;
|
||||
Piece * end_piece;
|
||||
uint8_t tabstop;
|
||||
Encoding::Type encoding;
|
||||
|
||||
PieceTable(const uint8_t * file_buffer, unsigned long file_buffer_size);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user