diff --git a/src/core/Encoding.cc b/src/core/Encoding.cc index ce428cc..b0033c6 100644 --- a/src/core/Encoding.cc +++ b/src/core/Encoding.cc @@ -60,3 +60,24 @@ Encoding::Type Encoding::detect_encoding(const uint8_t * buffer, size_t length) return UTF_8; } + +uint8_t Encoding::num_bytes_in_code_point(Type type, const uint8_t * encoded) +{ + switch (type) + { + case UTF_8: + { + if ((*encoded & 0x80u) == 0u) + return 1u; + encoded++; + uint8_t n = 1u; + while ((*encoded++ & 0xC0u) == 0x80u) + n++; + return n; + } + break; + case CP_1252: + return 1u; + } + return 0u; +} diff --git a/src/core/Encoding.h b/src/core/Encoding.h index 21e11f0..f673dbd 100644 --- a/src/core/Encoding.h +++ b/src/core/Encoding.h @@ -14,6 +14,7 @@ public: }; static Type detect_encoding(const uint8_t * buffer, size_t length); + static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded); }; #endif