Add output parameter to Encoding::decode() to get encoded size
This commit is contained in:
parent
ee37616f82
commit
39088f6518
@ -97,64 +97,71 @@ const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * enc
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t Encoding::decode(Type type, const uint8_t * encoded)
|
||||
uint32_t Encoding::decode(Type type, const uint8_t * encoded, uint8_t * encoded_size)
|
||||
{
|
||||
uint32_t result = 0u;
|
||||
uint8_t size = 0u;
|
||||
switch (type)
|
||||
{
|
||||
case UTF_8:
|
||||
{
|
||||
const uint8_t c = *encoded;
|
||||
uint8_t following_bytes = 0u;
|
||||
uint32_t v;
|
||||
const uint_fast8_t c = *encoded;
|
||||
if ((c & 0x80u) == 0u)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
else if ((c & 0xE0u) == 0xC0u)
|
||||
{
|
||||
v = c & 0x1Fu;
|
||||
following_bytes = 1u;
|
||||
}
|
||||
else if ((c & 0xF0u) == 0xE0u)
|
||||
{
|
||||
v = c & 0x0Fu;
|
||||
following_bytes = 2u;
|
||||
}
|
||||
else if ((c & 0xF8u) == 0xF0u)
|
||||
{
|
||||
v = c & 0x07u;
|
||||
following_bytes = 3u;
|
||||
}
|
||||
else if ((c & 0xFCu) == 0xF8u)
|
||||
{
|
||||
v = c & 0x03u;
|
||||
following_bytes = 4u;
|
||||
}
|
||||
else if ((c & 0xFEu) == 0xFCu)
|
||||
{
|
||||
v = c & 0x01u;
|
||||
following_bytes = 5u;
|
||||
result = c;
|
||||
size = 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0u;
|
||||
uint_fast8_t following_bytes = 0u;
|
||||
if ((c & 0xE0u) == 0xC0u)
|
||||
{
|
||||
result = c & 0x1Fu;
|
||||
following_bytes = 1u;
|
||||
}
|
||||
else if ((c & 0xF0u) == 0xE0u)
|
||||
{
|
||||
result = c & 0x0Fu;
|
||||
following_bytes = 2u;
|
||||
}
|
||||
else if ((c & 0xF8u) == 0xF0u)
|
||||
{
|
||||
result = c & 0x07u;
|
||||
following_bytes = 3u;
|
||||
}
|
||||
else if ((c & 0xFCu) == 0xF8u)
|
||||
{
|
||||
result = c & 0x03u;
|
||||
following_bytes = 4u;
|
||||
}
|
||||
else if ((c & 0xFEu) == 0xFCu)
|
||||
{
|
||||
result = c & 0x01u;
|
||||
following_bytes = 5u;
|
||||
}
|
||||
size = following_bytes + 1u;
|
||||
while (following_bytes-- > 0u)
|
||||
{
|
||||
encoded++;
|
||||
result <<= 6u;
|
||||
result |= *encoded & 0x3Fu;
|
||||
}
|
||||
}
|
||||
while (following_bytes-- > 0u)
|
||||
{
|
||||
encoded++;
|
||||
v <<= 6u;
|
||||
v |= *encoded & 0x3Fu;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
break;
|
||||
case CP_1252:
|
||||
/* TODO: map byte to code point */
|
||||
return *encoded;
|
||||
result = *encoded;
|
||||
size = 1u;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0u;
|
||||
if (encoded_size != nullptr)
|
||||
{
|
||||
*encoded_size = size;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint8_t Encoding::num_bytes_to_encode_code_point(uint32_t code_point, Type type)
|
||||
|
@ -21,7 +21,7 @@ public:
|
||||
static Type detect_encoding(const uint8_t * buffer, size_t length);
|
||||
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
|
||||
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
|
||||
static uint32_t decode(Type type, const uint8_t * encoded);
|
||||
static uint32_t decode(Type type, const uint8_t * encoded, uint8_t * encoded_size = nullptr);
|
||||
static uint8_t num_bytes_to_encode_code_point(uint32_t code_point, Type type);
|
||||
static uint8_t encode(uint32_t code_point, Type type, uint8_t * buffer);
|
||||
};
|
||||
|
@ -55,17 +55,26 @@ TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_poin
|
||||
|
||||
TEST(Encoding_decode, decodes_UTF_8_correctly)
|
||||
{
|
||||
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%"));
|
||||
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82"));
|
||||
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83"));
|
||||
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84"));
|
||||
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85"));
|
||||
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86"));
|
||||
uint8_t size = 0xFFu;
|
||||
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%", &size));
|
||||
EXPECT_EQ(1u, size);
|
||||
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82", &size));
|
||||
EXPECT_EQ(2u, size);
|
||||
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83", &size));
|
||||
EXPECT_EQ(3u, size);
|
||||
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84", &size));
|
||||
EXPECT_EQ(4u, size);
|
||||
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85", &size));
|
||||
EXPECT_EQ(5u, size);
|
||||
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86", &size));
|
||||
EXPECT_EQ(6u, size);
|
||||
}
|
||||
|
||||
TEST(Encoding_decode, decodes_CP_1252_correctly)
|
||||
{
|
||||
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99"));
|
||||
uint8_t size;
|
||||
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99", &size));
|
||||
EXPECT_EQ(1u, size);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user