Add output parameter to Encoding::decode() to get encoded size
This commit is contained in:
parent
ee37616f82
commit
39088f6518
@ -97,64 +97,71 @@ const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * enc
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t Encoding::decode(Type type, const uint8_t * encoded)
|
uint32_t Encoding::decode(Type type, const uint8_t * encoded, uint8_t * encoded_size)
|
||||||
{
|
{
|
||||||
|
uint32_t result = 0u;
|
||||||
|
uint8_t size = 0u;
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case UTF_8:
|
case UTF_8:
|
||||||
{
|
{
|
||||||
const uint8_t c = *encoded;
|
const uint_fast8_t c = *encoded;
|
||||||
uint8_t following_bytes = 0u;
|
|
||||||
uint32_t v;
|
|
||||||
if ((c & 0x80u) == 0u)
|
if ((c & 0x80u) == 0u)
|
||||||
{
|
{
|
||||||
return c;
|
result = c;
|
||||||
}
|
size = 1u;
|
||||||
else if ((c & 0xE0u) == 0xC0u)
|
|
||||||
{
|
|
||||||
v = c & 0x1Fu;
|
|
||||||
following_bytes = 1u;
|
|
||||||
}
|
|
||||||
else if ((c & 0xF0u) == 0xE0u)
|
|
||||||
{
|
|
||||||
v = c & 0x0Fu;
|
|
||||||
following_bytes = 2u;
|
|
||||||
}
|
|
||||||
else if ((c & 0xF8u) == 0xF0u)
|
|
||||||
{
|
|
||||||
v = c & 0x07u;
|
|
||||||
following_bytes = 3u;
|
|
||||||
}
|
|
||||||
else if ((c & 0xFCu) == 0xF8u)
|
|
||||||
{
|
|
||||||
v = c & 0x03u;
|
|
||||||
following_bytes = 4u;
|
|
||||||
}
|
|
||||||
else if ((c & 0xFEu) == 0xFCu)
|
|
||||||
{
|
|
||||||
v = c & 0x01u;
|
|
||||||
following_bytes = 5u;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return 0u;
|
uint_fast8_t following_bytes = 0u;
|
||||||
|
if ((c & 0xE0u) == 0xC0u)
|
||||||
|
{
|
||||||
|
result = c & 0x1Fu;
|
||||||
|
following_bytes = 1u;
|
||||||
|
}
|
||||||
|
else if ((c & 0xF0u) == 0xE0u)
|
||||||
|
{
|
||||||
|
result = c & 0x0Fu;
|
||||||
|
following_bytes = 2u;
|
||||||
|
}
|
||||||
|
else if ((c & 0xF8u) == 0xF0u)
|
||||||
|
{
|
||||||
|
result = c & 0x07u;
|
||||||
|
following_bytes = 3u;
|
||||||
|
}
|
||||||
|
else if ((c & 0xFCu) == 0xF8u)
|
||||||
|
{
|
||||||
|
result = c & 0x03u;
|
||||||
|
following_bytes = 4u;
|
||||||
|
}
|
||||||
|
else if ((c & 0xFEu) == 0xFCu)
|
||||||
|
{
|
||||||
|
result = c & 0x01u;
|
||||||
|
following_bytes = 5u;
|
||||||
|
}
|
||||||
|
size = following_bytes + 1u;
|
||||||
|
while (following_bytes-- > 0u)
|
||||||
|
{
|
||||||
|
encoded++;
|
||||||
|
result <<= 6u;
|
||||||
|
result |= *encoded & 0x3Fu;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
while (following_bytes-- > 0u)
|
|
||||||
{
|
|
||||||
encoded++;
|
|
||||||
v <<= 6u;
|
|
||||||
v |= *encoded & 0x3Fu;
|
|
||||||
}
|
|
||||||
return v;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CP_1252:
|
case CP_1252:
|
||||||
/* TODO: map byte to code point */
|
/* TODO: map byte to code point */
|
||||||
return *encoded;
|
result = *encoded;
|
||||||
|
size = 1u;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0u;
|
if (encoded_size != nullptr)
|
||||||
|
{
|
||||||
|
*encoded_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t Encoding::num_bytes_to_encode_code_point(uint32_t code_point, Type type)
|
uint8_t Encoding::num_bytes_to_encode_code_point(uint32_t code_point, Type type)
|
||||||
|
@ -21,7 +21,7 @@ public:
|
|||||||
static Type detect_encoding(const uint8_t * buffer, size_t length);
|
static Type detect_encoding(const uint8_t * buffer, size_t length);
|
||||||
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
|
static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded);
|
||||||
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
|
static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded);
|
||||||
static uint32_t decode(Type type, const uint8_t * encoded);
|
static uint32_t decode(Type type, const uint8_t * encoded, uint8_t * encoded_size = nullptr);
|
||||||
static uint8_t num_bytes_to_encode_code_point(uint32_t code_point, Type type);
|
static uint8_t num_bytes_to_encode_code_point(uint32_t code_point, Type type);
|
||||||
static uint8_t encode(uint32_t code_point, Type type, uint8_t * buffer);
|
static uint8_t encode(uint32_t code_point, Type type, uint8_t * buffer);
|
||||||
};
|
};
|
||||||
|
@ -55,17 +55,26 @@ TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_poin
|
|||||||
|
|
||||||
TEST(Encoding_decode, decodes_UTF_8_correctly)
|
TEST(Encoding_decode, decodes_UTF_8_correctly)
|
||||||
{
|
{
|
||||||
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%"));
|
uint8_t size = 0xFFu;
|
||||||
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82"));
|
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%", &size));
|
||||||
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83"));
|
EXPECT_EQ(1u, size);
|
||||||
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84"));
|
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82", &size));
|
||||||
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85"));
|
EXPECT_EQ(2u, size);
|
||||||
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86"));
|
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83", &size));
|
||||||
|
EXPECT_EQ(3u, size);
|
||||||
|
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84", &size));
|
||||||
|
EXPECT_EQ(4u, size);
|
||||||
|
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85", &size));
|
||||||
|
EXPECT_EQ(5u, size);
|
||||||
|
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86", &size));
|
||||||
|
EXPECT_EQ(6u, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Encoding_decode, decodes_CP_1252_correctly)
|
TEST(Encoding_decode, decodes_CP_1252_correctly)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99"));
|
uint8_t size;
|
||||||
|
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99", &size));
|
||||||
|
EXPECT_EQ(1u, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user