diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 907834e..301d4e6 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -11,6 +11,18 @@ import std.stdio; class <%= @classname %> { + /* Result codes. */ + public enum : size_t + { + P_SUCCESS, + P_DECODE_ERROR, + P_UNEXPECTED_INPUT, + P_UNEXPECTED_TOKEN, + P_TOKEN, + P_DROP, + P_EOF, + } + alias TokenID = uint; enum : TokenID @@ -71,20 +83,21 @@ class <%= @classname %> static class Decoder { - enum Result - { - SUCCESS, - EOF, - DECODE_ERROR, - } - - static Result decode_code_point(string input, + /** + * Decode a UTF-8 code point. + * + * Returns one of: + * - P_SUCCESS + * - P_DECODE_ERROR + * - P_EOF + */ + static size_t decode_code_point(string input, ref CodePoint out_code_point, ref ubyte out_code_point_length) { if (input.length == 0u) { - return Result.EOF; + return P_EOF; } char c = input[0]; CodePoint code_point; @@ -124,11 +137,11 @@ class <%= @classname %> } else { - return Result.DECODE_ERROR; + return P_DECODE_ERROR; } if (input.length <= following_bytes) { - return Result.DECODE_ERROR; + return P_DECODE_ERROR; } code_point_length = cast(ubyte)(following_bytes + 1u); for (size_t i = 0u; i < following_bytes; i++) @@ -136,14 +149,14 @@ class <%= @classname %> char b = input[i + 1u]; if ((b & 0xC0u) != 0x80u) { - return Result.DECODE_ERROR; + return P_DECODE_ERROR; } code_point = (code_point << 6u) | (b & 0x3Fu); } } out_code_point = code_point; out_code_point_length = code_point_length; - return Result.SUCCESS; + return P_SUCCESS; } } @@ -230,14 +243,6 @@ class <%= @classname %> <% end %> ]; - public enum : size_t - { - P_TOKEN, - P_UNEXPECTED_INPUT, - P_DECODE_ERROR, - P_DROP, - } - public static struct TokenInfo { size_t row; @@ -259,6 +264,14 @@ class <%= @classname %> m_mode = <%= @lexer.mode_id("default") %>; } + /** + * Lex the next token in the input stream. + * + * Returns one of: + * - P_TOKEN + * - P_DECODE_ERROR + * - P_UNEXPECTED_INPUT + */ size_t lex_token(TokenInfo * out_token_info) { for (;;) @@ -298,6 +311,15 @@ class <%= @classname %> return Token.invalid(); } + /** + * Attempt to lex the next token in the input stream. + * + * Returns one of: + * - P_TOKEN + * - P_DECODE_ERROR + * - P_UNEXPECTED_INPUT + * - P_DROP + */ private size_t attempt_lex_token(TokenInfo * out_token_info) { TokenInfo token_info; @@ -307,9 +329,10 @@ class <%= @classname %> *out_token_info = token_info; // TODO: remove MatchInfo match_info; size_t unexpected_input_length; - switch (find_longest_match(match_info, unexpected_input_length)) + size_t result = find_longest_match(match_info, unexpected_input_length); + switch (result) { - case FindLongestMatchResult.FOUND_MATCH: + case P_SUCCESS: uint token_to_accept = match_info.accepting_state.token; if (match_info.accepting_state.code_id.is_valid()) { @@ -344,30 +367,16 @@ class <%= @classname %> *out_token_info = token_info; return P_TOKEN; - case FindLongestMatchResult.DECODE_ERROR: - return P_DECODE_ERROR; - - case FindLongestMatchResult.EOF: + case P_EOF: token_info.token = TOKEN___EOF; *out_token_info = token_info; return P_TOKEN; - case FindLongestMatchResult.UNEXPECTED_INPUT: - return P_UNEXPECTED_INPUT; - default: - assert(false); + return result; } } - enum FindLongestMatchResult - { - FOUND_MATCH, - DECODE_ERROR, - EOF, - UNEXPECTED_INPUT, - } - struct MatchInfo { size_t length; @@ -376,7 +385,16 @@ class <%= @classname %> const(State) * accepting_state; } - private FindLongestMatchResult find_longest_match( + /** + * Find the longest lexer pattern match at the current position. + * + * Returns one of: + * - P_SUCCESS + * - P_UNEXPECTED_INPUT + * - P_DECODE_ERROR + * - P_EOF + */ + private size_t find_longest_match( ref MatchInfo out_match_info, ref size_t out_unexpected_input_length) { @@ -388,9 +406,10 @@ class <%= @classname %> string input = m_input[(m_input_position + attempt_match.length)..(m_input.length)]; CodePoint code_point; ubyte code_point_length; - switch (Decoder.decode_code_point(input, code_point, code_point_length)) + size_t result = Decoder.decode_code_point(input, code_point, code_point_length); + switch (result) { - case Decoder.Result.SUCCESS: + case P_SUCCESS: auto transition_result = transition(current_state, code_point); if (transition_result.found()) { @@ -414,41 +433,38 @@ class <%= @classname %> else if (longest_match.length > 0) { out_match_info = longest_match; - return FindLongestMatchResult.FOUND_MATCH; + return P_SUCCESS; } else { out_unexpected_input_length = attempt_match.length + code_point_length; - return FindLongestMatchResult.UNEXPECTED_INPUT; + return P_UNEXPECTED_INPUT; } break; - case Decoder.Result.EOF: + case P_EOF: /* We hit EOF. */ if (longest_match.length > 0) { /* We have a match, so use it. */ out_match_info = longest_match; - return FindLongestMatchResult.FOUND_MATCH; + return P_SUCCESS; } else if (attempt_match.length != 0) { /* There is a partial match - error! */ out_unexpected_input_length = attempt_match.length; - return FindLongestMatchResult.UNEXPECTED_INPUT; + return P_UNEXPECTED_INPUT; } else { /* Valid EOF return. */ - return FindLongestMatchResult.EOF; + return P_EOF; } break; - case Decoder.Result.DECODE_ERROR: - return FindLongestMatchResult.DECODE_ERROR; - default: - assert(false); + return result; } } } @@ -564,14 +580,6 @@ class <%= @classname %> m_lexer = new Lexer(input); } - public enum : size_t - { - P_SUCCESS, - P_DECODE_ERROR, - P_UNEXPECTED_INPUT, - P_UNEXPECTED_TOKEN, - } - size_t parse() { Lexer.TokenInfo token_info; @@ -584,14 +592,9 @@ class <%= @classname %> if (token == _TOKEN_COUNT) { size_t lexer_result = m_lexer.lex_token(&token_info); - switch (lexer_result) + if (lexer_result != P_TOKEN) { - case Lexer.P_UNEXPECTED_INPUT: - return P_UNEXPECTED_INPUT; - case Lexer.P_DECODE_ERROR: - return P_DECODE_ERROR; - default: - break; + return lexer_result; } token = token_info.token; } diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index 5238aee..818930f 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -8,37 +8,36 @@ int main() unittest { - alias Result = Testparser.Decoder.Result; - Result result; + size_t result; Testparser.CodePoint code_point; ubyte code_point_length; result = Testparser.Decoder.decode_code_point("5", code_point, code_point_length); - assert(result == Result.SUCCESS); + assert(result == Testparser.P_SUCCESS); assert(code_point == '5'); assert(code_point_length == 1u); result = Testparser.Decoder.decode_code_point("", code_point, code_point_length); - assert(result == Result.EOF); + assert(result == Testparser.P_EOF); result = Testparser.Decoder.decode_code_point("\xC2\xA9", code_point, code_point_length); - assert(result == Result.SUCCESS); + assert(result == Testparser.P_SUCCESS); assert(code_point == 0xA9u); assert(code_point_length == 2u); result = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xa1", code_point, code_point_length); - assert(result == Result.SUCCESS); + assert(result == Testparser.P_SUCCESS); assert(code_point == 0x1F9E1u); assert(code_point_length == 4u); result = Testparser.Decoder.decode_code_point("\xf0\x9f\x27", code_point, code_point_length); - assert(result == Result.DECODE_ERROR); + assert(result == Testparser.P_DECODE_ERROR); result = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xFF", code_point, code_point_length); - assert(result == Result.DECODE_ERROR); + assert(result == Testparser.P_DECODE_ERROR); result = Testparser.Decoder.decode_code_point("\xfe", code_point, code_point_length); - assert(result == Result.DECODE_ERROR); + assert(result == Testparser.P_DECODE_ERROR); } unittest @@ -47,24 +46,24 @@ unittest TokenInfo token_info; string input = "5 + 4 * \n677 + 567"; Testparser.Lexer lexer = new Testparser.Lexer(input); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(0, 0, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(0, 2, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(0, 4, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(0, 6, 1, Testparser.TOKEN_times)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(1, 0, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(1, 4, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(1, 6, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(1, 9, 0, Testparser.TOKEN___EOF)); lexer = new Testparser.Lexer(""); - assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(lexer.lex_token(&token_info) == Testparser.P_TOKEN); assert(token_info == TokenInfo(0, 0, 0, Testparser.TOKEN___EOF)); } diff --git a/spec/test_d_parser_identical_rules_lookahead.d b/spec/test_d_parser_identical_rules_lookahead.d index 3ca758c..3a699ab 100644 --- a/spec/test_d_parser_identical_rules_lookahead.d +++ b/spec/test_d_parser_identical_rules_lookahead.d @@ -10,9 +10,9 @@ unittest { string input = "aba"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); input = "abb"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); } diff --git a/spec/test_d_parser_rule_from_multiple_states.d b/spec/test_d_parser_rule_from_multiple_states.d index 8ef50e2..09a44c0 100644 --- a/spec/test_d_parser_rule_from_multiple_states.d +++ b/spec/test_d_parser_rule_from_multiple_states.d @@ -10,13 +10,13 @@ unittest { string input = "a"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_UNEXPECTED_TOKEN); + assert(parser.parse() == Testparser.P_UNEXPECTED_TOKEN); input = "a b"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); input = "bb"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); } diff --git a/spec/test_lexer_match_text.d b/spec/test_lexer_match_text.d index fc3ffd6..0ea8f79 100644 --- a/spec/test_lexer_match_text.d +++ b/spec/test_lexer_match_text.d @@ -10,6 +10,6 @@ unittest { string input = `identifier_123`; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass1"); } diff --git a/spec/test_lexer_modes.d b/spec/test_lexer_modes.d index a36d295..b14d3ee 100644 --- a/spec/test_lexer_modes.d +++ b/spec/test_lexer_modes.d @@ -10,11 +10,11 @@ unittest { string input = `abc "a string" def`; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass1"); input = `abc "abc def" def`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass2"); } diff --git a/spec/test_lexer_result_value.d b/spec/test_lexer_result_value.d index 2d7510e..295707d 100644 --- a/spec/test_lexer_result_value.d +++ b/spec/test_lexer_result_value.d @@ -10,11 +10,11 @@ unittest { string input = `x`; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(parser.result == 1u); input = `fabulous`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(parser.result == 8u); } diff --git a/spec/test_parser_rule_user_code.d b/spec/test_parser_rule_user_code.d index 9f1e436..ea7c7c8 100644 --- a/spec/test_parser_rule_user_code.d +++ b/spec/test_parser_rule_user_code.d @@ -10,5 +10,5 @@ unittest { string input = "ab"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); } diff --git a/spec/test_parsing_json.d b/spec/test_parsing_json.d index 521f2ba..47271f7 100644 --- a/spec/test_parsing_json.d +++ b/spec/test_parsing_json.d @@ -11,33 +11,33 @@ unittest { string input = ``; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); input = `{}`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONObject)parser.result); input = `[]`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONArray)parser.result); input = `-45.6`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONNumber)parser.result); assert((cast(JSONNumber)parser.result).value == -45.6); input = `2E-2`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONNumber)parser.result); assert((cast(JSONNumber)parser.result).value == 0.02); input = `{"hi":true}`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONObject)parser.result); JSONObject o = cast(JSONObject)parser.result; assert(o.value["hi"]); @@ -45,7 +45,7 @@ unittest input = `{"ff": false, "nn": null}`; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(cast(JSONObject)parser.result); o = cast(JSONObject)parser.result; assert(o.value["ff"]); diff --git a/spec/test_parsing_lists.d b/spec/test_parsing_lists.d index 7fcda71..b013fb7 100644 --- a/spec/test_parsing_lists.d +++ b/spec/test_parsing_lists.d @@ -10,16 +10,16 @@ unittest { string input = "a"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(parser.result == 1u); input = ""; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(parser.result == 0u); input = "aaaaaaaaaaaaaaaa"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); assert(parser.result == 16u); } diff --git a/spec/test_pattern.d b/spec/test_pattern.d index fad7d28..8d2d05e 100644 --- a/spec/test_pattern.d +++ b/spec/test_pattern.d @@ -10,11 +10,11 @@ unittest { string input = "abcdef"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass1"); input = "defabcdef"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass2"); } diff --git a/spec/test_return_token_from_pattern.d b/spec/test_return_token_from_pattern.d index 5572a4a..18367bb 100644 --- a/spec/test_return_token_from_pattern.d +++ b/spec/test_return_token_from_pattern.d @@ -10,5 +10,5 @@ unittest { string input = "defghidef"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); } diff --git a/spec/test_user_code.d b/spec/test_user_code.d index 45f37ad..723efdd 100644 --- a/spec/test_user_code.d +++ b/spec/test_user_code.d @@ -10,11 +10,11 @@ unittest { string input = "abcdef"; auto parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass1"); input = "abcabcdef"; parser = new Testparser.Parser(input); - assert(parser.parse() == Testparser.Parser.P_SUCCESS); + assert(parser.parse() == Testparser.P_SUCCESS); writeln("pass2"); }