109 lines
3.6 KiB
C++
109 lines
3.6 KiB
C++
#include "gtest/gtest.h"
|
|
#include "Encoding.h"
|
|
#include <string>
|
|
|
|
TEST(Encoding_detect_encoding, returns_UTF_8_for_all_ascii)
|
|
{
|
|
uint8_t buffer[] = "ABCDEFG 12345";
|
|
EXPECT_EQ(Encoding::UTF_8, Encoding::detect_encoding(buffer, sizeof(buffer) - 1));
|
|
}
|
|
|
|
TEST(Encoding_detect_encoding, returns_UTF_8_for_valid_encodings)
|
|
{
|
|
uint8_t buffer[] = "%\xC1\x82\xE1\x82\x83\xF1\x82\x83\x84\xF9\x82\x83\x84\x85\xFD\x82\x83\x84\x85\x86yay";
|
|
EXPECT_EQ(Encoding::UTF_8, Encoding::detect_encoding(buffer, sizeof(buffer) - 1));
|
|
}
|
|
|
|
TEST(Encoding_detect_encoding, returns_CP_1252_if_not_valid_utf_8)
|
|
{
|
|
uint8_t buffer[] = "ABC\xEE\xCC";
|
|
EXPECT_EQ(Encoding::CP_1252, Encoding::detect_encoding(buffer, sizeof(buffer) - 1));
|
|
}
|
|
|
|
|
|
TEST(Encoding_num_bytes_in_code_point, returns_1_for_UTF_8_ascii_character)
|
|
{
|
|
uint8_t buffer[] = "Z";
|
|
EXPECT_EQ(1, Encoding::num_bytes_in_code_point(Encoding::UTF_8, buffer));
|
|
}
|
|
|
|
TEST(Encoding_num_bytes_in_code_point, returns_1_for_CP_1252_character)
|
|
{
|
|
uint8_t buffer[] = "\x96";
|
|
EXPECT_EQ(1, Encoding::num_bytes_in_code_point(Encoding::CP_1252, buffer));
|
|
}
|
|
|
|
TEST(Encoding_num_bytes_in_code_point, returns_3_for_UTF_8_multi_byte)
|
|
{
|
|
uint8_t buffer[] = "\xE1\x82\x83";
|
|
EXPECT_EQ(3, Encoding::num_bytes_in_code_point(Encoding::UTF_8, buffer));
|
|
}
|
|
|
|
|
|
TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_point_for_UTF_8)
|
|
{
|
|
uint8_t buffer[] = "12\xE1\x82\x83";
|
|
EXPECT_EQ(&buffer[2], Encoding::beginning_of_code_point(Encoding::UTF_8, &buffer[4]));
|
|
}
|
|
|
|
TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_point_for_CP_1252)
|
|
{
|
|
uint8_t buffer[] = "12\xE1\x82\x83";
|
|
EXPECT_EQ(&buffer[4], Encoding::beginning_of_code_point(Encoding::CP_1252, &buffer[4]));
|
|
}
|
|
|
|
|
|
TEST(Encoding_decode, decodes_UTF_8_correctly)
|
|
{
|
|
uint8_t size = 0xFFu;
|
|
EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%", &size));
|
|
EXPECT_EQ(1u, size);
|
|
EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82", &size));
|
|
EXPECT_EQ(2u, size);
|
|
EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83", &size));
|
|
EXPECT_EQ(3u, size);
|
|
EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84", &size));
|
|
EXPECT_EQ(4u, size);
|
|
EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85", &size));
|
|
EXPECT_EQ(5u, size);
|
|
EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86", &size));
|
|
EXPECT_EQ(6u, size);
|
|
}
|
|
|
|
TEST(Encoding_decode, decodes_CP_1252_correctly)
|
|
{
|
|
uint8_t size;
|
|
EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99", &size));
|
|
EXPECT_EQ(1u, size);
|
|
}
|
|
|
|
|
|
TEST(Encoding_encode, encodes_UTF_8_correctly)
|
|
{
|
|
struct
|
|
{
|
|
uint32_t code_point;
|
|
const char * expected;
|
|
} tests[] = {
|
|
{(uint32_t)'%', "%"},
|
|
{0x567u, "\xD5\xA7"},
|
|
{0x9876u, "\xE9\xA1\xB6"},
|
|
{0x12345u, "\xF0\x92\x8D\x85"},
|
|
{0x1234567u, "\xF9\x88\xB4\x95\xA7"},
|
|
{0x12345678u, "\xFC\x92\x8D\x85\x99\xB8"},
|
|
};
|
|
for (unsigned int i = 0; i < sizeof(tests) / sizeof(tests[0]); i++)
|
|
{
|
|
char buffer[10] = {0};
|
|
EXPECT_EQ(strlen(tests[i].expected), Encoding::encode(tests[i].code_point, Encoding::UTF_8, (uint8_t *)buffer));
|
|
EXPECT_EQ(std::string(tests[i].expected), buffer);
|
|
}
|
|
}
|
|
|
|
TEST(Encoding_encode, encodes_CP_1252_correctly)
|
|
{
|
|
char buffer[10] = {0};
|
|
EXPECT_EQ(1, Encoding::encode(0x89u, Encoding::CP_1252, (uint8_t *)buffer));
|
|
EXPECT_EQ(std::string("\x89"), buffer);
|
|
}
|