add Encoding class

This commit is contained in:
Josh Holtrop 2016-08-11 20:03:53 -04:00
parent 3c5a7b7341
commit c4253078a9
2 changed files with 81 additions and 0 deletions

62
src/core/Encoding.cc Normal file
View File

@ -0,0 +1,62 @@
#include "Encoding.h"
static inline bool
check_continuation_bytes(const uint8_t ** buffer,
uint8_t continuation_bytes_to_check)
{
for (uint8_t i = 0; i < continuation_bytes_to_check; i++)
{
if ((**buffer & 0xC0u) != 0x80u)
return false;
(*buffer)++;
}
return true;
}
Encoding::Type Encoding::detect_encoding(const uint8_t * buffer, size_t length)
{
for (size_t i = 0u; i < length; i++)
{
const uint8_t c = *buffer;
uint8_t continuation_bytes_to_check;
if ((c & 0x80u) == 0u)
{
buffer++;
continue;
}
else if ((c & 0xE0u) == 0xC0u)
{
continuation_bytes_to_check = 1u;
}
else if ((c & 0xF0u) == 0xE0u)
{
continuation_bytes_to_check = 2u;
}
else if ((c & 0xF8u) == 0xF0u)
{
continuation_bytes_to_check = 3u;
}
else if ((c & 0xFCu) == 0xF8u)
{
continuation_bytes_to_check = 4u;
}
else if ((c & 0xFEu) == 0xFCu)
{
continuation_bytes_to_check = 5u;
}
else
{
return CP_1252;
}
buffer++;
if (((i + continuation_bytes_to_check) < length) &&
check_continuation_bytes(&buffer, continuation_bytes_to_check))
{
i += continuation_bytes_to_check;
continue;
}
return CP_1252;
}
return UTF_8;
}

19
src/core/Encoding.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef ENCODING_H
#define ENCODING_H
#include <stdint.h>
#include <stdlib.h>
class Encoding
{
public:
enum Type
{
UTF_8,
CP_1252,
};
static Type detect_encoding(const uint8_t * buffer, size_t length);
};
#endif