Add output parameter to Encoding::decode() to get encoded size

This commit is contained in:
Josh Holtrop 2016-12-17 10:54:50 -05:00
parent ee37616f82
commit 39088f6518
3 changed files with 64 additions and 48 deletions

View File

@ -97,64 +97,71 @@ const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * enc
return nullptr;
}
uint32_t Encoding::decode(Type type, const uint8_t * encoded)
uint32_t Encoding::decode(Type type, const uint8_t * encoded, uint8_t * encoded_size)
{
uint32_t result = 0u;
uint8_t size = 0u;
switch (type)
{
case UTF_8:
{
const uint8_t c = *encoded;
uint8_t following_bytes = 0u;
uint32_t v;
const uint_fast8_t c = *encoded;
if ((c & 0x80u) == 0u)
{
return c;
}
else if ((c & 0xE0u) == 0xC0u)
{
v = c & 0x1Fu;
following_bytes = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
v = c & 0x0Fu;
following_bytes = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
v = c & 0x07u;
following_bytes = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
v = c & 0x03u;
following_bytes = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
v = c & 0x01u;
following_bytes = 5u;
result = c;
size = 1u;
}
else
{
return 0u;
uint_fast8_t following_bytes = 0u;
if ((c & 0xE0u) == 0xC0u)
{
result = c & 0x1Fu;
following_bytes = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
result = c & 0x0Fu;
following_bytes = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
result = c & 0x07u;
following_bytes = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
result = c & 0x03u;
following_bytes = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
result = c & 0x01u;
following_bytes = 5u;
}
size = following_bytes + 1u;
while (following_bytes-- > 0u)
{
encoded++;
result <<= 6u;
result |= *encoded & 0x3Fu;
}
}
while (following_bytes-- > 0u)
{
encoded++;
v <<= 6u;
v |= *encoded & 0x3Fu;
}
return v;
}
break;
case CP_1252:
/* TODO: map byte to code point */
return *encoded;
result = *encoded;
size = 1u;
break;
}
return 0u;
if (encoded_size != nullptr)
{
*encoded_size = size;
}
return result;
}
uint8_t Encoding::num_bytes_to_encode_code_point(uint32_t code_point, Type type)

View File

@ -21,7 +21,7 @@ public:
static Type detect_encoding(const uint8_t * buffer, size_t length);
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
static uint32_t decode(Type type, const uint8_t * encoded);
static uint32_t decode(Type type, const uint8_t * encoded, uint8_t * encoded_size = nullptr);
static uint8_t num_bytes_to_encode_code_point(uint32_t code_point, Type type);
static uint8_t encode(uint32_t code_point, Type type, uint8_t * buffer);
};

View File

@ -55,17 +55,26 @@ TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_poin
TEST(Encoding_decode, decodes_UTF_8_correctly)
{
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%"));
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82"));
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83"));
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84"));
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85"));
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86"));
uint8_t size = 0xFFu;
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%", &size));
EXPECT_EQ(1u, size);
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82", &size));
EXPECT_EQ(2u, size);
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83", &size));
EXPECT_EQ(3u, size);
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84", &size));
EXPECT_EQ(4u, size);
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85", &size));
EXPECT_EQ(5u, size);
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86", &size));
EXPECT_EQ(6u, size);
}
TEST(Encoding_decode, decodes_CP_1252_correctly)
{
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99"));
uint8_t size;
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99", &size));
EXPECT_EQ(1u, size);
}