From e1008b43a66feab0472dcc7960b0cb61a4b49929 Mon Sep 17 00:00:00 2001
From: Tijmen van Nesselrooij <tijmen_222@hotmail.com>
Date: Sun, 17 May 2020 20:30:57 +0200
Subject: [PATCH] Add string literals

---
 README.md                               |  3 ++
 bin/test.wasm                           |  3 ++
 include/interpret/errors.hpp            | 28 +++++------
 include/token/errors.hpp                | 18 +++++++
 include/token/token.hpp                 |  1 +
 include/token/tokentype.hpp             |  1 +
 src/interpret/{error.cpp => errors.cpp} | 26 +++++-----
 src/main.cpp                            | 33 +++++++++----
 src/token/errors.cpp                    | 15 ++++++
 src/token/token.cpp                     |  9 ++++
 src/token/tokenizer.cpp                 | 63 ++++++++++++++++++++-----
 11 files changed, 152 insertions(+), 48 deletions(-)
 create mode 100644 include/token/errors.hpp
 rename src/interpret/{error.cpp => errors.cpp} (52%)
 create mode 100644 src/token/errors.cpp

diff --git a/README.md b/README.md
index 695d862..2e8c867 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,7 @@ The following characters are used as identifiers:
 - semicolon (`;`) for statement termination
 - hash (`#`) for comments
 - square brackets (`[` and `]`) for addressing memory
+- double quotes (`"`) for string values
 
 ## Memory Model
 
@@ -90,6 +91,8 @@ There is currently no strict checking, so be careful.
 
 - `DECLARE` declares the first label argument to equal the second, immediate
 	value, argument and is used to declare a constant for the virtual machine.
+- `STRING` puts the string value declared as the second argument in the memory
+	memory location of the first immediate argument
 
 ### Operands
 
diff --git a/bin/test.wasm b/bin/test.wasm
index eba9ae1..4bfa8e4 100644
--- a/bin/test.wasm
+++ b/bin/test.wasm
@@ -104,6 +104,9 @@ seti %B $10;
 int $5;
 int $3;
 
+# Demonstrate string literals
+"Hello world!";
+
 exit;
 
 noop_function:
diff --git a/include/interpret/errors.hpp b/include/interpret/errors.hpp
index cd7f267..066beea 100644
--- a/include/interpret/errors.hpp
+++ b/include/interpret/errors.hpp
@@ -5,71 +5,71 @@
 
 namespace Interpret
 {
-	struct TokenError : public std::exception
+	struct InterpretationError : public std::exception
 	{
 		Token::Token errorToken;
 		std::string errorMsg;
-		TokenError(Token::Token const & token, std::string const & msg);
+		InterpretationError(Token::Token const & token, std::string const & msg);
 	};
 
-	struct ExpectedArgument : public TokenError
+	struct ExpectedArgument : public InterpretationError
 	{
 		ExpectedArgument(Token::Token const & token);
 	};
 
-	struct ExpectedLabel : public TokenError
+	struct ExpectedLabel : public InterpretationError
 	{
 		ExpectedLabel(Token::Token const & token);
 	};
 
-	struct ExpectedValue : public TokenError
+	struct ExpectedValue : public InterpretationError
 	{
 		ExpectedValue(Token::Token const & token);
 	};
 
-	struct ExpectedImmediate : public TokenError
+	struct ExpectedImmediate : public InterpretationError
 	{
 		ExpectedImmediate(Token::Token const & token);
 	};
 
-	struct ExpectedImmediateOrMemory : public TokenError
+	struct ExpectedImmediateOrMemory : public InterpretationError
 	{
 		ExpectedImmediateOrMemory(Token::Token const & token);
 	};
 
-	struct ExpectedRegister : public TokenError
+	struct ExpectedRegister : public InterpretationError
 	{
 		ExpectedRegister(Token::Token const & token);
 	};
 
-	struct ExpectedRegisterOrMemory : public TokenError
+	struct ExpectedRegisterOrMemory : public InterpretationError
 	{
 		ExpectedRegisterOrMemory(Token::Token const & token);
 	};
 
-	struct ExpectedOperand : public TokenError
+	struct ExpectedOperand : public InterpretationError
 	{
 		ExpectedOperand(Token::Token const & token);
 	};
 
-	struct TooManyArguments : public TokenError
+	struct TooManyArguments : public InterpretationError
 	{
 		TooManyArguments(Token::Token const & token);
 	};
 
-	struct TooFewArguments : public TokenError
+	struct TooFewArguments : public InterpretationError
 	{
 		TooFewArguments(Token::Token const & token);
 	};
 
-	struct MissingEndOfStatment : public TokenError
+	struct MissingEndOfStatment : public InterpretationError
 	{
 		MissingEndOfStatment(Token::Token const & token);
 	};
 
 	namespace Internal
 	{
-		struct BadTokenForValue : public TokenError
+		struct BadTokenForValue : public InterpretationError
 		{
 			BadTokenForValue(Token::Token const & token);
 		};
diff --git a/include/token/errors.hpp b/include/token/errors.hpp
new file mode 100644
index 0000000..4943f84
--- /dev/null
+++ b/include/token/errors.hpp
@@ -0,0 +1,18 @@
+#pragma once
+#include <stdexcept>
+#include <token/token.hpp>
+
+namespace Token
+{
+	struct TokenizationError : public std::exception
+	{
+		Token errorToken;
+		std::string errorMsg;
+		TokenizationError(Token const & token, std::string const & msg);
+	};
+
+	struct MissingEndOfString : public TokenizationError
+	{
+		MissingEndOfString(Token const & token);
+	};
+}
\ No newline at end of file
diff --git a/include/token/token.hpp b/include/token/token.hpp
index 4f9b8b7..5b651fe 100644
--- a/include/token/token.hpp
+++ b/include/token/token.hpp
@@ -42,6 +42,7 @@ namespace Token
 		static Token CreateOperandToken(OperandType const operandType, int const lineNumber, int const lineColumn);
 		static Token CreateMemoryToken(RegisterType const registerType, int const lineNumber, int const lineColumn);
 		static Token CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn);
+		static Token CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn);
 
 		void DebugPrint() const;
 	};
diff --git a/include/token/tokentype.hpp b/include/token/tokentype.hpp
index 5ee79fb..5d03f2b 100644
--- a/include/token/tokentype.hpp
+++ b/include/token/tokentype.hpp
@@ -10,6 +10,7 @@ namespace Token
 		Register,
 		StatementEnd,
 		Label,
+		String,
 		Memory
 	};
 }
\ No newline at end of file
diff --git a/src/interpret/error.cpp b/src/interpret/errors.cpp
similarity index 52%
rename from src/interpret/error.cpp
rename to src/interpret/errors.cpp
index b2e526e..9d0f36e 100644
--- a/src/interpret/error.cpp
+++ b/src/interpret/errors.cpp
@@ -2,71 +2,71 @@
 
 namespace Interpret
 {
-	TokenError::TokenError(Token::Token const & token, std::string const & msg)
+	InterpretationError::InterpretationError(Token::Token const & token, std::string const & msg)
 		: errorToken(token),
 		errorMsg(msg)
 	{
 	}
 
 	ExpectedArgument::ExpectedArgument(Token::Token const & token)
-		: TokenError(token, "Expected an argument")
+		: InterpretationError(token, "Expected an argument")
 	{
 	}
 
 	ExpectedLabel::ExpectedLabel(Token::Token const & token)
-		: TokenError(token, "Expected a label")
+		: InterpretationError(token, "Expected a label")
 	{
 	}
 
 	ExpectedValue::ExpectedValue(Token::Token const & token)
-		: TokenError(token, "Expected an immediate value, a register or a memory location")
+		: InterpretationError(token, "Expected an immediate value, a register or a memory location")
 	{
 	}
 
 	ExpectedImmediate::ExpectedImmediate(Token::Token const & token)
-		: TokenError(token, "Expected an immediate value")
+		: InterpretationError(token, "Expected an immediate value")
 	{
 	}
 
 	ExpectedImmediateOrMemory::ExpectedImmediateOrMemory(Token::Token const & token)
-		: TokenError(token, "Expected an immediate value or a memory location")
+		: InterpretationError(token, "Expected an immediate value or a memory location")
 	{
 	}
 
 	ExpectedRegister::ExpectedRegister(Token::Token const & token)
-		: TokenError(token, "Expected a register")
+		: InterpretationError(token, "Expected a register")
 	{
 	}
 
 	ExpectedRegisterOrMemory::ExpectedRegisterOrMemory(Token::Token const & token)
-		: TokenError(token, "Expected a register or a memory location")
+		: InterpretationError(token, "Expected a register or a memory location")
 	{
 	}
 
 	ExpectedOperand::ExpectedOperand(Token::Token const & token)
-		: TokenError(token, "Expected an operand")
+		: InterpretationError(token, "Expected an operand")
 	{
 	}
 
 	TooManyArguments::TooManyArguments(Token::Token const & token)
-		: TokenError(token, "Too many arguments for operand")
+		: InterpretationError(token, "Too many arguments for operand")
 	{
 	}
 
 	TooFewArguments::TooFewArguments(Token::Token const & token)
-		: TokenError(token, "Too few arguments for operand")
+		: InterpretationError(token, "Too few arguments for operand")
 	{
 	}
 
 	MissingEndOfStatment::MissingEndOfStatment(Token::Token const & token)
-		: TokenError(token, "Missing end of line terminator (;)")
+		: InterpretationError(token, "Missing end of line terminator (;)")
 	{
 	}
 
 	namespace Internal
 	{
 		BadTokenForValue::BadTokenForValue(Token::Token const & token)
-			: TokenError(token, "Internal error when converting token to value")
+			: InterpretationError(token, "Internal error when converting token to value")
 		{
 		}
 	}
diff --git a/src/main.cpp b/src/main.cpp
index 738b335..f75071c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3,6 +3,7 @@
 #include <fstream>
 #include <interpret/errors.hpp>
 #include <interpret/interpreter.hpp>
+#include <token/errors.hpp>
 #include <token/tokenizer.hpp>
 
 void PrintBadToken(Token::Token const & token, std::vector<std::string> const & lines)
@@ -18,7 +19,13 @@ void PrintBadToken(Token::Token const & token, std::vector<std::string> const &
 	std::puts("^");
 }
 
-void PrintTokenError(Interpret::TokenError const & err, std::vector<std::string> const & lines)
+void PrintTokenError(Interpret::InterpretationError const & err, std::vector<std::string> const & lines)
+{
+	std::printf("%s ", err.errorMsg.c_str());
+	PrintBadToken(err.errorToken, lines);
+}
+
+void PrintTokenError(Token::TokenizationError const & err, std::vector<std::string> const & lines)
 {
 	std::printf("%s ", err.errorMsg.c_str());
 	PrintBadToken(err.errorToken, lines);
@@ -71,27 +78,37 @@ Interpret::Code GetCodeFromFile(std::string const & filePath)
 	std::vector<std::string> lines;
 	std::string line;
 	unsigned lineNumber = 0;
+	bool tokenizationError = false;
 	while(std::getline(input, line))
 	{
-		tokenizer.Tokenize(line, lineNumber, tokens);
+		try
+		{
+			tokenizer.Tokenize(line, lineNumber, tokens);
+		}
+		catch(Token::TokenizationError & err)
+		{
+			tokenizationError = true;
+			PrintTokenError(err, lines);
+		}
+
 		++lineNumber;
 		lines.push_back(line);
 	}
 	input.close();
 
 	// Validate the syntax
-	bool syntaxOk = true;
+	bool syntaxError = false;
 	for(auto const & token : tokens)
 	{
 		if (!token.isValid)
 		{
 			std::printf("Syntax error ");
 			PrintBadToken(token, lines);
-			syntaxOk = false;
+			syntaxError = true;
 		}
 	}
 
-	if (!syntaxOk)
+	if (tokenizationError || syntaxError)
 	{
 		std::puts("Aborting due to syntax error(s)");
 		exit(1);
@@ -103,7 +120,7 @@ Interpret::Code GetCodeFromFile(std::string const & filePath)
 	{
 		interpreter.Interpret(tokens, code);
 	}
-	catch(Interpret::TokenError & e)
+	catch(Interpret::InterpretationError & e)
 	{
 		PrintTokenError(e, lines);
 		exit(1);
@@ -116,13 +133,13 @@ int main(int argc, char ** argv)
 {
 	if (argc != 2)
 	{
-		std::puts("Usage: wassembly [filename.wasm]");
+		std::printf("Usage: %s [filename.wasm]\n", argv[0]);
 		return 1;
 	}
 
 	auto const code = GetCodeFromFile(argv[1]);
 	auto vm = Execute::VirtualMachine::CreateFromCode(code);
 	vm.Run();
-	
+
 	return 0;
 }
\ No newline at end of file
diff --git a/src/token/errors.cpp b/src/token/errors.cpp
new file mode 100644
index 0000000..54a924b
--- /dev/null
+++ b/src/token/errors.cpp
@@ -0,0 +1,15 @@
+#include <token/errors.hpp>
+
+namespace Token
+{
+	TokenizationError::TokenizationError(Token const & token, std::string const & msg)
+		: errorToken(token),
+		errorMsg(msg)
+	{
+	}
+
+	MissingEndOfString::MissingEndOfString(Token const & token)
+		: TokenizationError(token, "Missing string terminator (\")")
+	{
+	}
+}
\ No newline at end of file
diff --git a/src/token/token.cpp b/src/token/token.cpp
index b80f1ba..199b93d 100644
--- a/src/token/token.cpp
+++ b/src/token/token.cpp
@@ -103,6 +103,11 @@ namespace Token
 		return Token(TokenType::Memory, value, isValid, lineNumber, lineColumn);
 	}
 
+	Token Token::CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn)
+	{
+		return Token(TokenType::String, value.substr(1, value.size() - 2), true, lineNumber, lineColumn);
+	}
+
 	void Token::DebugPrint() const
 	{
 		std::putc(' ', stdout);
@@ -188,6 +193,10 @@ namespace Token
 			}
 			break;
 
+			case TokenType::String:
+			std::printf("STRING=\"%s\"", std::get<std::string>(data).c_str());
+			break;
+
 			case TokenType::Unknown:
 			default:
 			std::printf("UNKNOWN_TOKEN");
diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp
index 62a3c54..af39eac 100644
--- a/src/token/tokenizer.cpp
+++ b/src/token/tokenizer.cpp
@@ -1,5 +1,6 @@
 #include <map>
 #include <stdexcept>
+#include <token/errors.hpp>
 #include <token/tokenizer.hpp>
 
 namespace Token
@@ -9,7 +10,7 @@ namespace Token
 		return c == '\n' || c == ' ' || c == '\t' || c == '\r';
 	}
 
-	std::tuple<int, bool> TryParse(std::string const & string)
+	std::tuple<int, bool> TryParseInt(std::string const & string)
 	{
 		try
 		{
@@ -30,20 +31,22 @@ namespace Token
 		}
 
 		char const prefix = string[0];
-		if (prefix == '$')
+		switch(prefix)
 		{
-			auto const result = TryParse(string.substr(1, string.size()));
-			return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
-		}
+			case '$':
+			{
+				auto const result = TryParseInt(string.substr(1, string.size()));
+				return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
+			}
 
-		if (prefix == '%')
-		{
+			case '%':
 			return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn);
-		}
 
-		if (prefix == ';')
-		{
+			case ';':
 			return Token::CreateStatementEndToken(lineNumber, lineColumn);
+
+			default:
+			break;
 		}
 
 		char const postfix = string[string.size() - 1];
@@ -64,7 +67,7 @@ namespace Token
 			std::string const valueString = string.substr(2, string.size() - 3u);
 			if (memoryPrefix == '$')
 			{
-				auto const result = TryParse(valueString);
+				auto const result = TryParseInt(valueString);
 				return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
 			}
 			else if (memoryPrefix == '%')
@@ -97,8 +100,10 @@ namespace Token
 		enum class TokenizerState
 		{
 			LookForNextToken,
+			LookForStringEnd,
 			LookForTokenEnd,
 		};
+
 		TokenizerState state = TokenizerState::LookForNextToken;
 		unsigned columnTokenStart = 0;
 		for(unsigned column = 0u; column < line.size(); ++column)
@@ -115,7 +120,21 @@ namespace Token
 					}
 
 					columnTokenStart = column;
-					state = TokenizerState::LookForTokenEnd;
+
+					switch(line[column])
+					{
+						case '"':
+						state = TokenizerState::LookForStringEnd;
+						break;
+
+						case ';':
+						tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column));
+						break;
+
+						default:
+						state = TokenizerState::LookForTokenEnd;
+						break;
+					}
 				}
 				break;
 
@@ -130,11 +149,29 @@ namespace Token
 					state = TokenizerState::LookForNextToken;
 				}
 				break;
+
+				case TokenizerState::LookForStringEnd:
+				if (line[column] == '"' && line[column - 1] != '\\')
+				{
+					tokens.push_back(
+						Token::CreateStringLiteralToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
+					state = TokenizerState::LookForNextToken;
+				}
 			}
 		}
-		if (state == TokenizerState::LookForTokenEnd)
+
+		switch(state)
 		{
+			case TokenizerState::LookForTokenEnd:
 			tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
+			break;
+
+			case TokenizerState::LookForStringEnd:
+			throw MissingEndOfString(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
+
+			case TokenizerState::LookForNextToken:
+			default:
+			break;
 		}
 	}
 }
\ No newline at end of file