diff --git a/test/src/test_Encoding.cc b/test/src/test_Encoding.cc new file mode 100644 index 0000000..acb79cc --- /dev/null +++ b/test/src/test_Encoding.cc @@ -0,0 +1,69 @@ +#include "gtest/gtest.h" +#include "Encoding.h" +#include + +TEST(Encoding_detect_encoding, returns_UTF_8_for_all_ascii) +{ + uint8_t buffer[] = "ABCDEFG 12345"; + EXPECT_EQ(Encoding::UTF_8, Encoding::detect_encoding(buffer, sizeof(buffer) - 1)); +} + +TEST(Encoding_detect_encoding, returns_UTF_8_for_valid_encodings) +{ + uint8_t buffer[] = "%\xC1\x82\xE1\x82\x83\xF1\x82\x83\x84\xF9\x82\x83\x84\x85\xFD\x82\x83\x84\x85\x86yay"; + EXPECT_EQ(Encoding::UTF_8, Encoding::detect_encoding(buffer, sizeof(buffer) - 1)); +} + +TEST(Encoding_detect_encoding, returns_CP_1252_if_not_valid_utf_8) +{ + uint8_t buffer[] = "ABC\xEE\xCC"; + EXPECT_EQ(Encoding::CP_1252, Encoding::detect_encoding(buffer, sizeof(buffer) - 1)); +} + + +TEST(Encoding_num_bytes_in_code_point, returns_1_for_UTF_8_ascii_character) +{ + uint8_t buffer[] = "Z"; + EXPECT_EQ(1, Encoding::num_bytes_in_code_point(Encoding::UTF_8, buffer)); +} + +TEST(Encoding_num_bytes_in_code_point, returns_1_for_CP_1252_character) +{ + uint8_t buffer[] = "\x96"; + EXPECT_EQ(1, Encoding::num_bytes_in_code_point(Encoding::CP_1252, buffer)); +} + +TEST(Encoding_num_bytes_in_code_point, returns_3_for_UTF_8_multi_byte) +{ + uint8_t buffer[] = "\xE1\x82\x83"; + EXPECT_EQ(3, Encoding::num_bytes_in_code_point(Encoding::UTF_8, buffer)); +} + + +TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_point_for_UTF_8) +{ + uint8_t buffer[] = "12\xE1\x82\x83"; + EXPECT_EQ(&buffer[2], Encoding::beginning_of_code_point(Encoding::UTF_8, &buffer[4])); +} + +TEST(Encoding_beginning_of_code_point, returns_pointer_to_beginning_of_code_point_for_CP_1252) +{ + uint8_t buffer[] = "12\xE1\x82\x83"; + EXPECT_EQ(&buffer[4], Encoding::beginning_of_code_point(Encoding::CP_1252, &buffer[4])); +} + + +TEST(Encoding_decode, decodes_UTF_8_correctly) +{ + EXPECT_EQ((uint32_t)'%', Encoding::decode(Encoding::UTF_8, (const uint8_t *)"%")); + EXPECT_EQ(0x42u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xC1\x82")); + EXPECT_EQ(0x1083u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xE1\x82\x83")); + EXPECT_EQ(0x420C4u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF1\x82\x83\x84")); + EXPECT_EQ(0x1083105u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xF9\x82\x83\x84\x85")); + EXPECT_EQ(0x420C4146u, Encoding::decode(Encoding::UTF_8, (const uint8_t *)"\xFD\x82\x83\x84\x85\x86")); +} + +TEST(Encoding_decode, decodes_CP_1252_correctly) +{ + EXPECT_EQ(0x99u, Encoding::decode(Encoding::CP_1252, (const uint8_t *)"\x99")); +}