From aebc1dd86da1c003f62637b5d7c5169266d8c8e3 Mon Sep 17 00:00:00 2001 From: Tijmen van Nesselrooij Date: Fri, 28 Aug 2020 20:51:02 +0200 Subject: [PATCH] Improve syntax error reporting --- bin/error.wasm | 11 ++++++++ include/token/token.hpp | 10 ++++--- src/token/token.cpp | 39 ++++++++++++++++++--------- src/token/tokenizer.cpp | 60 ++++++++++++++++++++++------------------- src/wassembler.cpp | 5 +++- 5 files changed, 80 insertions(+), 45 deletions(-) create mode 100644 bin/error.wasm diff --git a/bin/error.wasm b/bin/error.wasm new file mode 100644 index 0000000..dd3a7a8 --- /dev/null +++ b/bin/error.wasm @@ -0,0 +1,11 @@ +seti %A '; +seti %A ' '; +seti %A ' +'; +seti %A $a12; +seti %A []; +seti %A [abc]; +seti %A [$0a4]; +seti %A ]; +seti %A [; +seti %A [$5; diff --git a/include/token/token.hpp b/include/token/token.hpp index 8d71b5a..de60498 100644 --- a/include/token/token.hpp +++ b/include/token/token.hpp @@ -32,17 +32,19 @@ namespace Token TokenValueType const valueType; bool isValid; std::variant data; + std::string errorMessage; Token(Token const & other); - static Token CreateUnknownToken(int const lineNumber, int const lineColumn); + static Token CreateEmptyToken(int const lineNumber, int const lineColumn); + static Token CreateErrorToken(std::string const & message, TokenType const type, int const lineNumber, int const lineColumn); static Token CreateStatementEndToken(int const lineNumber, int const lineColumn); - static Token CreateLabelToken(std::string const & string, bool isValid, int const lineNumber, int const lineColumn); - static Token CreateImmediateValueToken(int const value, bool isValid, int const lineNumber, int const lineColumn); + static Token CreateLabelToken(std::string const & string, int const lineNumber, int const lineColumn); + static Token CreateImmediateValueToken(int const value, int const lineNumber, int const lineColumn); static Token CreateRegisterToken(RegisterType const registerType, int const lineNumber, int const lineColumn); static Token CreateOperandToken(OperandType const operandType, int const lineNumber, int const lineColumn); static Token CreateMemoryToken(RegisterType const registerType, int const lineNumber, int const lineColumn); - static Token CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn); + static Token CreateMemoryToken(int const value, int const lineNumber, int const lineColumn); void DebugPrint() const; }; diff --git a/src/token/token.cpp b/src/token/token.cpp index e3b29fa..d56e14a 100644 --- a/src/token/token.cpp +++ b/src/token/token.cpp @@ -9,7 +9,8 @@ namespace Token type(_type), valueType(TokenValueType::None), isValid(validness), - data(0) + data(0), + errorMessage() { } @@ -19,7 +20,8 @@ namespace Token type(_type), valueType(TokenValueType::String), isValid(validness), - data(string) + data(string), + errorMessage() { } @@ -29,7 +31,8 @@ namespace Token type(_type), valueType(TokenValueType::Integer), isValid(validness), - data(value) + data(value), + errorMessage() { } @@ -39,7 +42,8 @@ namespace Token type(_type), valueType(TokenValueType::Register), isValid(validness), - data(registerType) + data(registerType), + errorMessage() { } @@ -49,7 +53,8 @@ namespace Token type(_type), valueType(TokenValueType::Operand), isValid(validness), - data(operandType) + data(operandType), + errorMessage() { } @@ -59,28 +64,36 @@ namespace Token type(other.type), valueType(other.valueType), isValid(other.isValid), - data(other.data) + data(other.data), + errorMessage(other.errorMessage) { } - Token Token::CreateUnknownToken(int const lineNumber, int const lineColumn) + Token Token::CreateEmptyToken(int const lineNumber, int const lineColumn) { return Token(TokenType::Unknown, false, lineNumber, lineColumn); } + Token Token::CreateErrorToken(std::string const & message, TokenType const type, int const lineNumber, int const lineColumn) + { + Token token(type, false, lineNumber, lineColumn); + token.errorMessage = message; + return token; + } + Token Token::CreateStatementEndToken(int const lineNumber, int const lineColumn) { return Token(TokenType::StatementEnd, true, lineNumber, lineColumn); } - Token Token::CreateLabelToken(std::string const & string, bool isValid, int const lineNumber, int const lineColumn) + Token Token::CreateLabelToken(std::string const & string, int const lineNumber, int const lineColumn) { - return Token(TokenType::Label, string, isValid, lineNumber, lineColumn); + return Token(TokenType::Label, string, true, lineNumber, lineColumn); } - Token Token::CreateImmediateValueToken(int const value, bool isValid, int const lineNumber, int const lineColumn) + Token Token::CreateImmediateValueToken(int const value, int const lineNumber, int const lineColumn) { - return Token(TokenType::ImmediateInteger, value, isValid, lineNumber, lineColumn); + return Token(TokenType::ImmediateInteger, value, true, lineNumber, lineColumn); } Token Token::CreateRegisterToken(RegisterType const registerType, int const lineNumber, int const lineColumn) @@ -98,9 +111,9 @@ namespace Token return Token(TokenType::Memory, registerType, registerType != RegisterType::Unknown, lineNumber, lineColumn); } - Token Token::CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn) + Token Token::CreateMemoryToken(int const value, int const lineNumber, int const lineColumn) { - return Token(TokenType::Memory, value, isValid, lineNumber, lineColumn); + return Token(TokenType::Memory, value, true, lineNumber, lineColumn); } void Token::DebugPrint() const diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp index 49d8fc0..0d54a40 100644 --- a/src/token/tokenizer.cpp +++ b/src/token/tokenizer.cpp @@ -25,7 +25,8 @@ namespace Token { if (string.size() == 0) { - return Token::CreateUnknownToken(lineNumber, lineColumn); + // TODO Should this become an error token? + return Token::CreateEmptyToken(lineNumber, lineColumn); } for(std::size_t i = 0; i < substitutions.size(); ++i) @@ -48,14 +49,13 @@ namespace Token { return Token::CreateImmediateValueToken( result.value(), - true, lineNumber, lineColumn); } - return Token::CreateImmediateValueToken( - 0, - false, + return Token::CreateErrorToken( + "Immediate cannot be parsed as an integer", + TokenType::ImmediateInteger, lineNumber, lineColumn); } @@ -79,7 +79,6 @@ namespace Token // TODO check if label is an Operand? return Token::CreateLabelToken( string.substr(0, string.size() - 1), - true, lineNumber, lineColumn); } @@ -88,7 +87,11 @@ namespace Token { if(string.size() < 4) { - return Token::CreateMemoryToken(0, false, lineNumber, lineColumn + 2u); + return Token::CreateErrorToken( + "Memory address statement is empty", + TokenType::Memory, + lineNumber, + lineColumn); } char const memoryPrefix = string[1]; @@ -101,14 +104,13 @@ namespace Token { return Token::CreateMemoryToken( result.value(), - true, lineNumber, lineColumn); } - return Token::CreateMemoryToken( - 0, - false, + return Token::CreateErrorToken( + "Memory immediate address cannot be parsed as an integer", + TokenType::Memory, lineNumber, lineColumn); } @@ -121,13 +123,21 @@ namespace Token } else { - return Token::CreateMemoryToken(0, false, lineNumber, lineColumn + 1u); + return Token::CreateErrorToken( + "Memory immediate address contains an unexpected value", + TokenType::Memory, + lineNumber, + lineColumn + 1u); } } else if (prefix == '[' || postfix == ']') { int const errorLineColumn = (prefix == '[') ? lineColumn : (lineColumn + string.size() - 1u); - return Token::CreateMemoryToken(0, false, lineNumber, errorLineColumn); + return Token::CreateErrorToken( + "Non terminated memory address brackets", + TokenType::Memory, + lineNumber, + errorLineColumn); } OperandType const opType = GetOperandType(string); @@ -137,7 +147,7 @@ namespace Token } // Last resort: it must be a label - return Token::CreateLabelToken(string, true, lineNumber, lineColumn); + return Token::CreateLabelToken(string, lineNumber, lineColumn); } bool IsWhiteSpace(char const c) @@ -256,23 +266,20 @@ end_state_loop: { if (line[i] == '\'') { - // Character literal must be exactly length 3 for now (2x ' + 1 - // character in between) if (lineColumn + 2u != i) { - tokens.emplace_back(Token::CreateImmediateValueToken( - 0, - false, + tokens.push_back(Token::CreateErrorToken( + "Character literal must be exactly 1 character long between single quotes", + TokenType::ImmediateInteger, lineNumber, - lineColumn)); + lineColumn + 1u)); } else { - tokens.emplace_back(Token::CreateImmediateValueToken( + tokens.push_back(Token::CreateImmediateValueToken( line[i - 1], - true, lineNumber, - lineColumn)); + lineColumn + 1)); } lineColumn = i; @@ -280,10 +287,9 @@ end_state_loop: } } - // Non terminated character literal! - tokens.emplace_back(Token::CreateImmediateValueToken( - 0, - false, + tokens.push_back(Token::CreateErrorToken( + "Non terminated character literal", + TokenType::ImmediateInteger, lineNumber, lineColumn)); diff --git a/src/wassembler.cpp b/src/wassembler.cpp index 7787fcf..188fbcd 100644 --- a/src/wassembler.cpp +++ b/src/wassembler.cpp @@ -5,9 +5,11 @@ void PrintBadToken(Token::Token const & token, std::vector const & lines) { - std::printf("at line number %i, column %i:\n", + std::printf("at line number %i, column %i: ", token.lineNumber + 1, token.lineColumn + 1); + std::puts(token.errorMessage.c_str()); + std::printf("%s\n", lines[token.lineNumber].c_str()); for(int i = 0; i < token.lineColumn; ++i) { @@ -109,6 +111,7 @@ bool Wassembler::LoadFromFile(std::string const & filePath) catch(Interpret::InterpretationError & e) { PrintBadToken(e.errorToken, lines); + std::puts("Aborting due to semantic error(s)"); return false; }