Detect other invalid UTF-8 encodings

This commit is contained in:
Josh Holtrop 2022-05-31 22:26:09 -04:00
parent a0af8b0d7c
commit 7598c589fe
2 changed files with 29 additions and 3 deletions

View File

@ -83,6 +83,10 @@ class <%= classname %>
code_point = c & 0x01u; code_point = c & 0x01u;
following_bytes = 5u; following_bytes = 5u;
} }
else
{
return DecodedCodePoint(CODE_POINT_INVALID, 0u);
}
if (input_length <= following_bytes) if (input_length <= following_bytes)
{ {
return DecodedCodePoint(CODE_POINT_INVALID, 0u); return DecodedCodePoint(CODE_POINT_INVALID, 0u);
@ -91,8 +95,12 @@ class <%= classname %>
while (following_bytes-- > 0u) while (following_bytes-- > 0u)
{ {
input++; input++;
code_point <<= 6u; ubyte b = *input;
code_point |= *input & 0x3Fu; if ((b & 0xC0u) != 0u)
{
return DecodedCodePoint(CODE_POINT_INVALID, 0u);
}
code_point = (code_point << 6u) | b;
} }
} }
return DecodedCodePoint(code_point, code_point_length); return DecodedCodePoint(code_point, code_point_length);

View File

@ -40,11 +40,29 @@ unittest
dcp = Testparser.Decoder.decode_code_point(input, input_length); dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u)); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
inputstring = "\xf0\x9f\xa7\xa1"; inputstring = "\xf0\x1f\x27\x21";
input = cast(const(ubyte) *)inputstring.ptr; input = cast(const(ubyte) *)inputstring.ptr;
input_length = inputstring.length; input_length = inputstring.length;
dcp = Testparser.Decoder.decode_code_point(input, input_length); dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(0x1F9E1, 4u)); assert(dcp == DCP(0x1F9E1, 4u));
inputstring = "\xf0\x1f\x27";
input = cast(const(ubyte) *)inputstring.ptr;
input_length = inputstring.length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
inputstring = "\xf0\x1f\x27\xFF";
input = cast(const(ubyte) *)inputstring.ptr;
input_length = inputstring.length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
inputstring = "\xfe";
input = cast(const(ubyte) *)inputstring.ptr;
input_length = inputstring.length;
dcp = Testparser.Decoder.decode_code_point(input, input_length);
assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
} }
unittest unittest