Compare commits

..

No commits in common. "b00f53050b17233315e57824b4ffe2f1b67673f7" and "7f844711d95148a87e9f5f68ba640b7e4e736e99" have entirely different histories.

2 changed files with 34 additions and 51 deletions

View File

@ -11,7 +11,7 @@ import std.stdio;
class <%= @classname %> class <%= @classname %>
{ {
enum : uint enum
{ {
<% @grammar.tokens.each_with_index do |token, index| %> <% @grammar.tokens.each_with_index do |token, index| %>
TOKEN_<%= token.code_name %> = <%= index %>, TOKEN_<%= token.code_name %> = <%= index %>,
@ -36,40 +36,23 @@ class <%= @classname %>
static class Decoder static class Decoder
{ {
struct Result enum
{ {
enum : ubyte CODE_POINT_INVALID = 0xFFFFFFFE,
{ CODE_POINT_EOF = 0xFFFFFFFF,
SUCCESS,
EOF,
DECODE_ERROR,
} }
ubyte result;
alias result this; struct DecodedCodePoint
{
uint code_point; uint code_point;
uint code_point_length; uint code_point_length;
static Result success(uint code_point, uint code_point_length)
{
return Result(SUCCESS, code_point, code_point_length);
} }
static Result eof() static DecodedCodePoint decode_code_point(string input)
{
return Result(EOF);
}
static Result decode_error()
{
return Result(DECODE_ERROR);
}
}
static Result decode_code_point(string input)
{ {
if (input.length == 0u) if (input.length == 0u)
{ {
return Result.eof(); return DecodedCodePoint(CODE_POINT_EOF, 0u);
} }
char c = input[0]; char c = input[0];
uint code_point; uint code_point;
@ -109,11 +92,11 @@ class <%= @classname %>
} }
else else
{ {
return Result.decode_error(); return DecodedCodePoint(CODE_POINT_INVALID, 0u);
} }
if (input.length <= following_bytes) if (input.length <= following_bytes)
{ {
return Result.decode_error(); return DecodedCodePoint(CODE_POINT_INVALID, 0u);
} }
code_point_length = following_bytes + 1u; code_point_length = following_bytes + 1u;
for (size_t i = 0u; i < following_bytes; i++) for (size_t i = 0u; i < following_bytes; i++)
@ -121,12 +104,12 @@ class <%= @classname %>
char b = input[i + 1u]; char b = input[i + 1u];
if ((b & 0xC0u) != 0x80u) if ((b & 0xC0u) != 0x80u)
{ {
return Result.decode_error(); return DecodedCodePoint(CODE_POINT_INVALID, 0u);
} }
code_point = (code_point << 6u) | (b & 0x3Fu); code_point = (code_point << 6u) | (b & 0x3Fu);
} }
} }
return Result.success(code_point, code_point_length); return DecodedCodePoint(code_point, code_point_length);
} }
} }
@ -255,13 +238,13 @@ class <%= @classname %>
for (;;) for (;;)
{ {
auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]); auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]);
if (decoded == Decoder.Result.DECODE_ERROR) if (decoded.code_point == Decoder.CODE_POINT_INVALID)
{ {
lt.token = _TOKEN_DECODE_ERROR; lt.token = _TOKEN_DECODE_ERROR;
return lt; return lt;
} }
bool lex_continue = false; bool lex_continue = false;
if (decoded != Decoder.Result.EOF) if (decoded.code_point != Decoder.CODE_POINT_EOF)
{ {
uint dest = transition(current_state, decoded.code_point); uint dest = transition(current_state, decoded.code_point);
if (dest != cast(uint)-1) if (dest != cast(uint)-1)

View File

@ -8,29 +8,29 @@ int main()
unittest unittest
{ {
alias Result = Testparser.Decoder.Result; alias DCP = Testparser.Decoder.DecodedCodePoint;
Result result; DCP dcp;
result = Testparser.Decoder.decode_code_point("5"); dcp = Testparser.Decoder.decode_code_point("5");
assert(result == Result.success('5', 1u)); assert(dcp == DCP('5', 1u));
result = Testparser.Decoder.decode_code_point(""); dcp = Testparser.Decoder.decode_code_point("");
assert(result == Result.eof()); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u));
result = Testparser.Decoder.decode_code_point("\xC2\xA9"); dcp = Testparser.Decoder.decode_code_point("\xC2\xA9");
assert(result == Result.success(0xA9u, 2u)); assert(dcp == DCP(0xA9u, 2u));
result = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xa1"); dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xa1");
assert(result == Result.success(0x1F9E1, 4u)); assert(dcp == DCP(0x1F9E1, 4u));
result = Testparser.Decoder.decode_code_point("\xf0\x9f\x27"); dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\x27");
assert(result == Result.decode_error()); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
result = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xFF"); dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xFF");
assert(result == Result.decode_error()); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
result = Testparser.Decoder.decode_code_point("\xfe"); dcp = Testparser.Decoder.decode_code_point("\xfe");
assert(result == Result.decode_error()); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u));
} }
unittest unittest