From fce9f872655f40b9d8ccbcfbb0b9356c6719a3e2 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sat, 21 Jan 2017 13:28:20 -0500 Subject: [PATCH] add EncodedString module --- src/core/EncodedString.cc | 44 +++++++++++++++++++++++++++ src/core/EncodedString.h | 63 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 src/core/EncodedString.cc create mode 100644 src/core/EncodedString.h diff --git a/src/core/EncodedString.cc b/src/core/EncodedString.cc new file mode 100644 index 0000000..8e75268 --- /dev/null +++ b/src/core/EncodedString.cc @@ -0,0 +1,44 @@ +#include "EncodedString.h" +#include + +uint32_t EncodedString::iterator::operator*() +{ + if (m_offset >= m_encoded_string.size()) + { + return INVALID_CODE_POINT; + } + return Encoding::decode(m_encoded_string.encoding(), &m_encoded_string[m_offset]); +} + +EncodedString::iterator & EncodedString::iterator::operator++() +{ + if (m_offset < m_encoded_string.size()) + { + m_offset += Encoding::num_bytes_in_code_point(m_encoded_string.encoding(), &m_encoded_string[m_offset]); + } + return *this; +} + +EncodedString::EncodedString(Encoding::Type encoding) + : m_encoding(encoding), + m_data(nullptr) +{ +} + +EncodedString::EncodedString(const uint8_t * data, size_t size, Encoding::Type encoding) + : m_encoding(encoding), + m_size(size) +{ + m_data = new uint8_t[size]; + memcpy(m_data, data, size); +} + +EncodedString::iterator EncodedString::begin() const +{ + return iterator(*this, 0u); +} + +EncodedString::iterator EncodedString::end() const +{ + return iterator(*this, m_size); +} diff --git a/src/core/EncodedString.h b/src/core/EncodedString.h new file mode 100644 index 0000000..f57f8f7 --- /dev/null +++ b/src/core/EncodedString.h @@ -0,0 +1,63 @@ +#ifndef ENCODEDSTRING_H +#define ENCODEDSTRING_H + +#include "Encoding.h" +#include + +class EncodedString +{ +public: + enum : uint32_t + { + INVALID_CODE_POINT = 0xFFFFFFFFu, + }; + + class iterator + { + public: + iterator(const EncodedString & encoded_string, size_t offset) + : m_encoded_string(encoded_string), + m_offset(offset) + { + } + uint32_t operator*(); + iterator & operator++(); + bool operator==(const iterator & other) const + { + return m_offset == other.m_offset; + } + bool operator!=(const iterator & other) const + { + return m_offset != other.m_offset; + } + + protected: + const EncodedString & m_encoded_string; + size_t m_offset; + }; + + EncodedString(Encoding::Type encoding = Encoding::UTF_8); + EncodedString(const uint8_t * data, size_t size, Encoding::Type encoding = Encoding::UTF_8); + + iterator begin() const; + iterator end() const; + size_t size() const + { + return m_size; + } + Encoding::Type encoding() const + { + return m_encoding; + } + const uint8_t & operator[](size_t index) const + { + return m_data[index]; + } + +protected: + Encoding::Type m_encoding; + uint8_t * m_data; + size_t m_size; +}; + +#endif