From e1008b43a66feab0472dcc7960b0cb61a4b49929 Mon Sep 17 00:00:00 2001 From: Tijmen van Nesselrooij Date: Sun, 17 May 2020 20:30:57 +0200 Subject: [PATCH] Add string literals --- README.md | 3 ++ bin/test.wasm | 3 ++ include/interpret/errors.hpp | 28 +++++------ include/token/errors.hpp | 18 +++++++ include/token/token.hpp | 1 + include/token/tokentype.hpp | 1 + src/interpret/{error.cpp => errors.cpp} | 26 +++++----- src/main.cpp | 33 +++++++++---- src/token/errors.cpp | 15 ++++++ src/token/token.cpp | 9 ++++ src/token/tokenizer.cpp | 63 ++++++++++++++++++++----- 11 files changed, 152 insertions(+), 48 deletions(-) create mode 100644 include/token/errors.hpp rename src/interpret/{error.cpp => errors.cpp} (52%) create mode 100644 src/token/errors.cpp diff --git a/README.md b/README.md index 695d862..2e8c867 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ The following characters are used as identifiers: - semicolon (`;`) for statement termination - hash (`#`) for comments - square brackets (`[` and `]`) for addressing memory +- double quotes (`"`) for string values ## Memory Model @@ -90,6 +91,8 @@ There is currently no strict checking, so be careful. - `DECLARE` declares the first label argument to equal the second, immediate value, argument and is used to declare a constant for the virtual machine. +- `STRING` puts the string value declared as the second argument in the memory + memory location of the first immediate argument ### Operands diff --git a/bin/test.wasm b/bin/test.wasm index eba9ae1..4bfa8e4 100644 --- a/bin/test.wasm +++ b/bin/test.wasm @@ -104,6 +104,9 @@ seti %B $10; int $5; int $3; +# Demonstrate string literals +"Hello world!"; + exit; noop_function: diff --git a/include/interpret/errors.hpp b/include/interpret/errors.hpp index cd7f267..066beea 100644 --- a/include/interpret/errors.hpp +++ b/include/interpret/errors.hpp @@ -5,71 +5,71 @@ namespace Interpret { - struct TokenError : public std::exception + struct InterpretationError : public std::exception { Token::Token errorToken; std::string errorMsg; - TokenError(Token::Token const & token, std::string const & msg); + InterpretationError(Token::Token const & token, std::string const & msg); }; - struct ExpectedArgument : public TokenError + struct ExpectedArgument : public InterpretationError { ExpectedArgument(Token::Token const & token); }; - struct ExpectedLabel : public TokenError + struct ExpectedLabel : public InterpretationError { ExpectedLabel(Token::Token const & token); }; - struct ExpectedValue : public TokenError + struct ExpectedValue : public InterpretationError { ExpectedValue(Token::Token const & token); }; - struct ExpectedImmediate : public TokenError + struct ExpectedImmediate : public InterpretationError { ExpectedImmediate(Token::Token const & token); }; - struct ExpectedImmediateOrMemory : public TokenError + struct ExpectedImmediateOrMemory : public InterpretationError { ExpectedImmediateOrMemory(Token::Token const & token); }; - struct ExpectedRegister : public TokenError + struct ExpectedRegister : public InterpretationError { ExpectedRegister(Token::Token const & token); }; - struct ExpectedRegisterOrMemory : public TokenError + struct ExpectedRegisterOrMemory : public InterpretationError { ExpectedRegisterOrMemory(Token::Token const & token); }; - struct ExpectedOperand : public TokenError + struct ExpectedOperand : public InterpretationError { ExpectedOperand(Token::Token const & token); }; - struct TooManyArguments : public TokenError + struct TooManyArguments : public InterpretationError { TooManyArguments(Token::Token const & token); }; - struct TooFewArguments : public TokenError + struct TooFewArguments : public InterpretationError { TooFewArguments(Token::Token const & token); }; - struct MissingEndOfStatment : public TokenError + struct MissingEndOfStatment : public InterpretationError { MissingEndOfStatment(Token::Token const & token); }; namespace Internal { - struct BadTokenForValue : public TokenError + struct BadTokenForValue : public InterpretationError { BadTokenForValue(Token::Token const & token); }; diff --git a/include/token/errors.hpp b/include/token/errors.hpp new file mode 100644 index 0000000..4943f84 --- /dev/null +++ b/include/token/errors.hpp @@ -0,0 +1,18 @@ +#pragma once +#include +#include + +namespace Token +{ + struct TokenizationError : public std::exception + { + Token errorToken; + std::string errorMsg; + TokenizationError(Token const & token, std::string const & msg); + }; + + struct MissingEndOfString : public TokenizationError + { + MissingEndOfString(Token const & token); + }; +} \ No newline at end of file diff --git a/include/token/token.hpp b/include/token/token.hpp index 4f9b8b7..5b651fe 100644 --- a/include/token/token.hpp +++ b/include/token/token.hpp @@ -42,6 +42,7 @@ namespace Token static Token CreateOperandToken(OperandType const operandType, int const lineNumber, int const lineColumn); static Token CreateMemoryToken(RegisterType const registerType, int const lineNumber, int const lineColumn); static Token CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn); + static Token CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn); void DebugPrint() const; }; diff --git a/include/token/tokentype.hpp b/include/token/tokentype.hpp index 5ee79fb..5d03f2b 100644 --- a/include/token/tokentype.hpp +++ b/include/token/tokentype.hpp @@ -10,6 +10,7 @@ namespace Token Register, StatementEnd, Label, + String, Memory }; } \ No newline at end of file diff --git a/src/interpret/error.cpp b/src/interpret/errors.cpp similarity index 52% rename from src/interpret/error.cpp rename to src/interpret/errors.cpp index b2e526e..9d0f36e 100644 --- a/src/interpret/error.cpp +++ b/src/interpret/errors.cpp @@ -2,71 +2,71 @@ namespace Interpret { - TokenError::TokenError(Token::Token const & token, std::string const & msg) + InterpretationError::InterpretationError(Token::Token const & token, std::string const & msg) : errorToken(token), errorMsg(msg) { } ExpectedArgument::ExpectedArgument(Token::Token const & token) - : TokenError(token, "Expected an argument") + : InterpretationError(token, "Expected an argument") { } ExpectedLabel::ExpectedLabel(Token::Token const & token) - : TokenError(token, "Expected a label") + : InterpretationError(token, "Expected a label") { } ExpectedValue::ExpectedValue(Token::Token const & token) - : TokenError(token, "Expected an immediate value, a register or a memory location") + : InterpretationError(token, "Expected an immediate value, a register or a memory location") { } ExpectedImmediate::ExpectedImmediate(Token::Token const & token) - : TokenError(token, "Expected an immediate value") + : InterpretationError(token, "Expected an immediate value") { } ExpectedImmediateOrMemory::ExpectedImmediateOrMemory(Token::Token const & token) - : TokenError(token, "Expected an immediate value or a memory location") + : InterpretationError(token, "Expected an immediate value or a memory location") { } ExpectedRegister::ExpectedRegister(Token::Token const & token) - : TokenError(token, "Expected a register") + : InterpretationError(token, "Expected a register") { } ExpectedRegisterOrMemory::ExpectedRegisterOrMemory(Token::Token const & token) - : TokenError(token, "Expected a register or a memory location") + : InterpretationError(token, "Expected a register or a memory location") { } ExpectedOperand::ExpectedOperand(Token::Token const & token) - : TokenError(token, "Expected an operand") + : InterpretationError(token, "Expected an operand") { } TooManyArguments::TooManyArguments(Token::Token const & token) - : TokenError(token, "Too many arguments for operand") + : InterpretationError(token, "Too many arguments for operand") { } TooFewArguments::TooFewArguments(Token::Token const & token) - : TokenError(token, "Too few arguments for operand") + : InterpretationError(token, "Too few arguments for operand") { } MissingEndOfStatment::MissingEndOfStatment(Token::Token const & token) - : TokenError(token, "Missing end of line terminator (;)") + : InterpretationError(token, "Missing end of line terminator (;)") { } namespace Internal { BadTokenForValue::BadTokenForValue(Token::Token const & token) - : TokenError(token, "Internal error when converting token to value") + : InterpretationError(token, "Internal error when converting token to value") { } } diff --git a/src/main.cpp b/src/main.cpp index 738b335..f75071c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include void PrintBadToken(Token::Token const & token, std::vector const & lines) @@ -18,7 +19,13 @@ void PrintBadToken(Token::Token const & token, std::vector const & std::puts("^"); } -void PrintTokenError(Interpret::TokenError const & err, std::vector const & lines) +void PrintTokenError(Interpret::InterpretationError const & err, std::vector const & lines) +{ + std::printf("%s ", err.errorMsg.c_str()); + PrintBadToken(err.errorToken, lines); +} + +void PrintTokenError(Token::TokenizationError const & err, std::vector const & lines) { std::printf("%s ", err.errorMsg.c_str()); PrintBadToken(err.errorToken, lines); @@ -71,27 +78,37 @@ Interpret::Code GetCodeFromFile(std::string const & filePath) std::vector lines; std::string line; unsigned lineNumber = 0; + bool tokenizationError = false; while(std::getline(input, line)) { - tokenizer.Tokenize(line, lineNumber, tokens); + try + { + tokenizer.Tokenize(line, lineNumber, tokens); + } + catch(Token::TokenizationError & err) + { + tokenizationError = true; + PrintTokenError(err, lines); + } + ++lineNumber; lines.push_back(line); } input.close(); // Validate the syntax - bool syntaxOk = true; + bool syntaxError = false; for(auto const & token : tokens) { if (!token.isValid) { std::printf("Syntax error "); PrintBadToken(token, lines); - syntaxOk = false; + syntaxError = true; } } - if (!syntaxOk) + if (tokenizationError || syntaxError) { std::puts("Aborting due to syntax error(s)"); exit(1); @@ -103,7 +120,7 @@ Interpret::Code GetCodeFromFile(std::string const & filePath) { interpreter.Interpret(tokens, code); } - catch(Interpret::TokenError & e) + catch(Interpret::InterpretationError & e) { PrintTokenError(e, lines); exit(1); @@ -116,13 +133,13 @@ int main(int argc, char ** argv) { if (argc != 2) { - std::puts("Usage: wassembly [filename.wasm]"); + std::printf("Usage: %s [filename.wasm]\n", argv[0]); return 1; } auto const code = GetCodeFromFile(argv[1]); auto vm = Execute::VirtualMachine::CreateFromCode(code); vm.Run(); - + return 0; } \ No newline at end of file diff --git a/src/token/errors.cpp b/src/token/errors.cpp new file mode 100644 index 0000000..54a924b --- /dev/null +++ b/src/token/errors.cpp @@ -0,0 +1,15 @@ +#include + +namespace Token +{ + TokenizationError::TokenizationError(Token const & token, std::string const & msg) + : errorToken(token), + errorMsg(msg) + { + } + + MissingEndOfString::MissingEndOfString(Token const & token) + : TokenizationError(token, "Missing string terminator (\")") + { + } +} \ No newline at end of file diff --git a/src/token/token.cpp b/src/token/token.cpp index b80f1ba..199b93d 100644 --- a/src/token/token.cpp +++ b/src/token/token.cpp @@ -103,6 +103,11 @@ namespace Token return Token(TokenType::Memory, value, isValid, lineNumber, lineColumn); } + Token Token::CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn) + { + return Token(TokenType::String, value.substr(1, value.size() - 2), true, lineNumber, lineColumn); + } + void Token::DebugPrint() const { std::putc(' ', stdout); @@ -188,6 +193,10 @@ namespace Token } break; + case TokenType::String: + std::printf("STRING=\"%s\"", std::get(data).c_str()); + break; + case TokenType::Unknown: default: std::printf("UNKNOWN_TOKEN"); diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp index 62a3c54..af39eac 100644 --- a/src/token/tokenizer.cpp +++ b/src/token/tokenizer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace Token @@ -9,7 +10,7 @@ namespace Token return c == '\n' || c == ' ' || c == '\t' || c == '\r'; } - std::tuple TryParse(std::string const & string) + std::tuple TryParseInt(std::string const & string) { try { @@ -30,20 +31,22 @@ namespace Token } char const prefix = string[0]; - if (prefix == '$') + switch(prefix) { - auto const result = TryParse(string.substr(1, string.size())); - return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); - } + case '$': + { + auto const result = TryParseInt(string.substr(1, string.size())); + return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); + } - if (prefix == '%') - { + case '%': return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn); - } - if (prefix == ';') - { + case ';': return Token::CreateStatementEndToken(lineNumber, lineColumn); + + default: + break; } char const postfix = string[string.size() - 1]; @@ -64,7 +67,7 @@ namespace Token std::string const valueString = string.substr(2, string.size() - 3u); if (memoryPrefix == '$') { - auto const result = TryParse(valueString); + auto const result = TryParseInt(valueString); return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); } else if (memoryPrefix == '%') @@ -97,8 +100,10 @@ namespace Token enum class TokenizerState { LookForNextToken, + LookForStringEnd, LookForTokenEnd, }; + TokenizerState state = TokenizerState::LookForNextToken; unsigned columnTokenStart = 0; for(unsigned column = 0u; column < line.size(); ++column) @@ -115,7 +120,21 @@ namespace Token } columnTokenStart = column; - state = TokenizerState::LookForTokenEnd; + + switch(line[column]) + { + case '"': + state = TokenizerState::LookForStringEnd; + break; + + case ';': + tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column)); + break; + + default: + state = TokenizerState::LookForTokenEnd; + break; + } } break; @@ -130,11 +149,29 @@ namespace Token state = TokenizerState::LookForNextToken; } break; + + case TokenizerState::LookForStringEnd: + if (line[column] == '"' && line[column - 1] != '\\') + { + tokens.push_back( + Token::CreateStringLiteralToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart)); + state = TokenizerState::LookForNextToken; + } } } - if (state == TokenizerState::LookForTokenEnd) + + switch(state) { + case TokenizerState::LookForTokenEnd: tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart)); + break; + + case TokenizerState::LookForStringEnd: + throw MissingEndOfString(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart)); + + case TokenizerState::LookForNextToken: + default: + break; } } } \ No newline at end of file