From b2bed71ee433c9186856d7d849ab97ecd59f2b5a Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Thu, 11 Aug 2016 23:13:32 -0400 Subject: [PATCH] add Encoding::beginning_of_code_point() --- src/core/Encoding.cc | 15 +++++++++++++++ src/core/Encoding.h | 1 + 2 files changed, 16 insertions(+) diff --git a/src/core/Encoding.cc b/src/core/Encoding.cc index b0033c6..774d2b4 100644 --- a/src/core/Encoding.cc +++ b/src/core/Encoding.cc @@ -81,3 +81,18 @@ uint8_t Encoding::num_bytes_in_code_point(Type type, const uint8_t * encoded) } return 0u; } + +const uint8_t * Encoding::beginning_of_code_point(Type type, const uint8_t * encoded) +{ + switch (type) + { + case UTF_8: + while ((*encoded & 0xC0u) == 0x80u) + encoded--; + return encoded; + case CP_1252: + return encoded; + } + + return nullptr; +} diff --git a/src/core/Encoding.h b/src/core/Encoding.h index f673dbd..5be2ca4 100644 --- a/src/core/Encoding.h +++ b/src/core/Encoding.h @@ -15,6 +15,7 @@ public: static Type detect_encoding(const uint8_t * buffer, size_t length); static uint8_t num_bytes_in_code_point(Type type, const uint8_t * encoded); + static const uint8_t * beginning_of_code_point(Type type, const uint8_t * encoded); }; #endif