diff --git a/assets/parser.d.erb b/assets/parser.d.erb index c44fd22..c158be0 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -41,13 +41,13 @@ class <%= @classname %> uint code_point_length; } - static DecodedCodePoint decode_code_point(const(ubyte) * input, size_t input_length) + static DecodedCodePoint decode_code_point(string input) { - if (input_length == 0u) + if (input.length == 0u) { return DecodedCodePoint(CODE_POINT_EOF, 0u); } - ubyte c = *input; + char c = input[0]; uint code_point; uint code_point_length; if ((c & 0x80u) == 0u) @@ -87,20 +87,19 @@ class <%= @classname %> { return DecodedCodePoint(CODE_POINT_INVALID, 0u); } - if (input_length <= following_bytes) + if (input.length <= following_bytes) { return DecodedCodePoint(CODE_POINT_INVALID, 0u); } code_point_length = following_bytes + 1u; - while (following_bytes-- > 0u) + for (size_t i = 0u; i < following_bytes; i++) { - input++; - ubyte b = *input; - if ((b & 0xC0u) != 0u) + char b = input[i + 1u]; + if ((b & 0xC0u) != 0x80u) { return DecodedCodePoint(CODE_POINT_INVALID, 0u); } - code_point = (code_point << 6u) | b; + code_point = (code_point << 6u) | (b & 0x3Fu); } } return DecodedCodePoint(code_point, code_point_length); @@ -159,17 +158,15 @@ class <%= @classname %> uint token; } - private const(ubyte) * m_input; - private size_t m_input_length; + private string m_input; private size_t m_input_position; private size_t m_input_row; private size_t m_input_col; private size_t m_mode; - this(const(ubyte) * input, size_t input_length) + this(string input) { m_input = input; - m_input_length = input_length; m_mode = <%= @lexer.mode_id("default") %>; } @@ -227,7 +224,7 @@ class <%= @classname %> uint current_state = modes[m_mode].state_table_offset; for (;;) { - auto decoded = Decoder.decode_code_point(&m_input[m_input_position + attempt_match_info.length], m_input_length - m_input_position - attempt_match_info.length); + auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]); if (decoded.code_point == Decoder.CODE_POINT_INVALID) { lt.token = _TOKEN_DECODE_ERROR; @@ -377,9 +374,9 @@ class <%= @classname %> private <%= @grammar.result_type %> parse_result; - this(const(ubyte) * input, size_t input_length) + this(string input) { - m_lexer = new Lexer(input, input_length); + m_lexer = new Lexer(input); } bool parse() diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index bf8e816..e274543 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -76,6 +76,7 @@ EOF build_parser compile("spec/test_d_lexer.d") results = run + expect(results.stderr).to eq "" expect(results.status).to eq 0 end diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index 5af56d8..92522bd 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -9,59 +9,27 @@ int main() unittest { alias DCP = Testparser.Decoder.DecodedCodePoint; - string inputstring = "5+\n 66"; - const(ubyte) * input = cast(const(ubyte) *)inputstring.ptr; - size_t input_length = inputstring.length; DCP dcp; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + + dcp = Testparser.Decoder.decode_code_point("5"); assert(dcp == DCP('5', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); - assert(dcp == DCP('+', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); - assert(dcp == DCP('\n', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); - assert(dcp == DCP(' ', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); - assert(dcp == DCP('6', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); - assert(dcp == DCP('6', 1u)); - input += dcp.code_point_length; - input_length -= dcp.code_point_length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + + dcp = Testparser.Decoder.decode_code_point(""); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_EOF, 0u)); - inputstring = "\xf0\x1f\x27\x21"; - input = cast(const(ubyte) *)inputstring.ptr; - input_length = inputstring.length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + dcp = Testparser.Decoder.decode_code_point("\xC2\xA9"); + assert(dcp == DCP(0xA9u, 2u)); + + dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xa1"); assert(dcp == DCP(0x1F9E1, 4u)); - inputstring = "\xf0\x1f\x27"; - input = cast(const(ubyte) *)inputstring.ptr; - input_length = inputstring.length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\x27"); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u)); - inputstring = "\xf0\x1f\x27\xFF"; - input = cast(const(ubyte) *)inputstring.ptr; - input_length = inputstring.length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + dcp = Testparser.Decoder.decode_code_point("\xf0\x9f\xa7\xFF"); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u)); - inputstring = "\xfe"; - input = cast(const(ubyte) *)inputstring.ptr; - input_length = inputstring.length; - dcp = Testparser.Decoder.decode_code_point(input, input_length); + dcp = Testparser.Decoder.decode_code_point("\xfe"); assert(dcp == DCP(Testparser.Decoder.CODE_POINT_INVALID, 0u)); } @@ -69,7 +37,7 @@ unittest { alias LT = Testparser.Lexer.LexedToken; string input = "5 + 4 * \n677 + 567"; - Testparser.Lexer lexer = new Testparser.Lexer(cast(const(ubyte) *)input.ptr, input.length); + Testparser.Lexer lexer = new Testparser.Lexer(input); assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_int)); assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_plus)); assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_int)); @@ -79,6 +47,6 @@ unittest assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int)); assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF)); - lexer = new Testparser.Lexer(null, 0u); + lexer = new Testparser.Lexer(""); assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF)); } diff --git a/spec/test_d_parser_identical_rules_lookahead.d b/spec/test_d_parser_identical_rules_lookahead.d index 772711f..0b34a21 100644 --- a/spec/test_d_parser_identical_rules_lookahead.d +++ b/spec/test_d_parser_identical_rules_lookahead.d @@ -9,10 +9,10 @@ int main() unittest { string input = "aba"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); input = "abb"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); } diff --git a/spec/test_d_parser_rule_from_multiple_states.d b/spec/test_d_parser_rule_from_multiple_states.d index 4b28ca8..8fff083 100644 --- a/spec/test_d_parser_rule_from_multiple_states.d +++ b/spec/test_d_parser_rule_from_multiple_states.d @@ -9,14 +9,14 @@ int main() unittest { string input = "a"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == false); input = "a b"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); input = "bb"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); } diff --git a/spec/test_lexer_modes.d b/spec/test_lexer_modes.d index c1f7b27..00000a1 100644 --- a/spec/test_lexer_modes.d +++ b/spec/test_lexer_modes.d @@ -9,12 +9,12 @@ int main() unittest { string input = `abc "a string" def`; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass1"); input = `abc "abc def" def`; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass2"); } diff --git a/spec/test_parser_rule_user_code.d b/spec/test_parser_rule_user_code.d index 5829f74..9b0b503 100644 --- a/spec/test_parser_rule_user_code.d +++ b/spec/test_parser_rule_user_code.d @@ -9,6 +9,6 @@ int main() unittest { string input = "ab"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); } diff --git a/spec/test_parsing_lists.d b/spec/test_parsing_lists.d index 71df9e5..297891c 100644 --- a/spec/test_parsing_lists.d +++ b/spec/test_parsing_lists.d @@ -9,17 +9,17 @@ int main() unittest { string input = "a"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); assert(parser.result == 1u); input = ""; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); assert(parser.result == 0u); input = "aaaaaaaaaaaaaaaa"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); assert(parser.result == 16u); } diff --git a/spec/test_pattern.d b/spec/test_pattern.d index aba9195..d8c160d 100644 --- a/spec/test_pattern.d +++ b/spec/test_pattern.d @@ -9,12 +9,12 @@ int main() unittest { string input = "abcdef"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass1"); input = "defabcdef"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass2"); } diff --git a/spec/test_return_token_from_pattern.d b/spec/test_return_token_from_pattern.d index 970e0d1..66a755f 100644 --- a/spec/test_return_token_from_pattern.d +++ b/spec/test_return_token_from_pattern.d @@ -9,6 +9,6 @@ int main() unittest { string input = "defghidef"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); } diff --git a/spec/test_user_code.d b/spec/test_user_code.d index ad3fd25..719065a 100644 --- a/spec/test_user_code.d +++ b/spec/test_user_code.d @@ -9,12 +9,12 @@ int main() unittest { string input = "abcdef"; - auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + auto parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass1"); input = "abcabcdef"; - parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length); + parser = new Testparser.Parser(input); assert(parser.parse() == true); writeln("pass2"); }