Add DEFINE preprocessor directive

2020-05-30 12:21:50 +02:00
parent 3bb2cc17e9
commit 0286f25e8d
5 changed files with 174 additions and 29 deletions
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ language, or keep on reading!
 ## From Text To Runtime Behaviour
 In order to turn the source text into executable code we use 3 passes:
- Pass 1: tokenization (syntax check)
+- Pass 1: tokenization (syntax check) and preprocessing (substitution)
 - Pass 2: interpretation (semantics check)
 - Pass 3: execution (runtime check)
@@ -69,7 +69,6 @@ The following characters are used as identifiers:
 - semicolon (`;`) for statement termination
 - hash (`#`) for comments
 - square brackets (`[` and `]`) for addressing memory
 - double quotes (`"`) for string values
 ## Memory Model
@@ -87,12 +86,16 @@ second byte of location `$900`).
 All symbols are reserved keywords and can therefore NOT be used as labels.
 There is currently no strict checking, so be careful.
-## Directives
+## Preprocessor
- `DECLARE` declares the first label argument to equal the second, immediate
+All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
-	value, argument and is used to declare a constant for the virtual machine.
+directives do not halt compilation, they are merely reported and then ignored.
- `STRING` puts the string value declared as the second argument in the memory
+
-	memory location of the first immediate argument
+- `DEFINE` replaces any occurrence of the first argument by the second argument.
 	The second argument may be empty, effectively deleting occurences of argument
 	one. Quotes are currently not supported and arguments are separated by
 	whitespace. If multiple defines exist for the same substitution the first
 	declared is used.
 ### Operands
--- a/bin/test.wasm
+++ b/bin/test.wasm
@@ -1,4 +1,4 @@
-#DECLARE MEMORY_SIZE $4096;
+#DEFINE
 addi $10 $-5 %A;
 subi %A $2 %B;
@@ -22,16 +22,17 @@ seti %A %B;
 lti %A $10;
 jmp count_loop;
 #DEFINE PRINT_CHAR $0
 # Hello world
 seti %A $72;  # H
-int $0;
+int PRINT_CHAR;
 seti %A $101; # e
-int $0;
+int PRINT_CHAR;
 seti %A $108; # l
-int $0;
+int PRINT_CHAR;
-int $0;
+int PRINT_CHAR;
 seti %A $111; # o
-int $0;
+int PRINT_CHAR;
 seti %A $32; # space
 int $0;
--- a/include/token/tokenizer.hpp
+++ b/include/token/tokenizer.hpp
@@ -7,6 +7,12 @@ namespace Token
 {
 	class Tokenizer
 	{
 	private:
 		std::vector<std::pair<std::string, std::string>> substitutions;
 		Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
 		void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
 	public:
 		void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
 	};
--- a/include/wassembler.hpp
+++ b/include/wassembler.hpp
@@ -1,7 +1,6 @@
 #pragma once
 #include <configuration.hpp>
 #include <execute/virtualmachine.hpp>
 #include <preprocessor/preprocessor.hpp>
 #include <interpret/interpreter.hpp>
 #include <token/tokenizer.hpp>
--- a/src/token/tokenizer.cpp
+++ b/src/token/tokenizer.cpp
@@ -5,11 +5,6 @@
 namespace Token
 {
 	bool IsWhiteSpace(char const c)
 	{
 		return c == '\n' || c == ' ' || c == '\t' || c == '\r';
 	}
 	std::tuple<int, bool> TryParseInt(std::string const & string)
 	{
 		try
@@ -23,24 +18,42 @@ namespace Token
 		}
 	}
-	Token ExtractToken(std::string const & string, int const lineNumber, int const lineColumn)
+	Token Tokenizer::ExtractToken(std::string string,
 	int const lineNumber,
 	int const lineColumn) const
 	{
 		if (string.size() == 0)
 		{
 			return Token::CreateUnknownToken(lineNumber, lineColumn);
 		}
 		for(std::size_t i = 0; i < substitutions.size(); ++i)
 		{
 			if (string == substitutions[i].first)
 			{
 				string = substitutions[i].second;
 				break;
 			}
 		}
 		char const prefix = string[0];
 		switch(prefix)
 		{
 			case '$':
 			{
 				auto const result = TryParseInt(string.substr(1, string.size()));
-				return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
+				return Token::CreateImmediateValueToken(
 					std::get<0>(result),
 					std::get<1>(result),
 					lineNumber,
 					lineColumn);
 			}
 			case '%':
-			return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn);
+			return Token::CreateRegisterToken(GetRegisterType(
 				string.substr(1, string.size())),
 				lineNumber,
 				lineColumn);
 			case ';':
 			return Token::CreateStatementEndToken(lineNumber, lineColumn);
@@ -53,7 +66,11 @@ namespace Token
 		if (postfix == ':')
 		{
 			// TODO check if label is an Operand?
-			return Token::CreateLabelToken(string.substr(0, string.size() - 1), true, lineNumber, lineColumn);
+			return Token::CreateLabelToken(
 				string.substr(0, string.size() - 1),
 				true,
 				lineNumber,
 				lineColumn);
 		}
 		if (prefix == '[' && postfix == ']')
@@ -68,11 +85,18 @@ namespace Token
 			if (memoryPrefix == '$')
 			{
 				auto const result = TryParseInt(valueString);
-				return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
+				return Token::CreateMemoryToken(
 					std::get<0>(result),
 					std::get<1>(result),
 					lineNumber,
 					lineColumn);
 			}
 			else if (memoryPrefix == '%')
 			{
-				return Token::CreateMemoryToken(GetRegisterType(valueString), lineNumber, lineColumn);
+				return Token::CreateMemoryToken(
 					GetRegisterType(valueString),
 					lineNumber,
 					lineColumn);
 			}
 			else
 			{
@@ -95,7 +119,115 @@ namespace Token
 		return Token::CreateLabelToken(string, true, lineNumber, lineColumn);
 	}
-	void Tokenizer::Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens)
+	bool IsWhiteSpace(char const c)
 	{
 		return c == '\n' || c == ' ' || c == '\t' || c == '\r';
 	}
 	void Tokenizer::ParseComment(
 		std::string const & string,
 		int const lineNumber,
 		int const lineColumn)
 	{
 		unsigned const commentContentStart = lineColumn + 1;
 		if (string.size() < commentContentStart ||
 			IsWhiteSpace(string[commentContentStart]))
 		{
 			return;
 		}
 		enum class CommentParseState
 		{
 			LookForDirectiveEnd,
 			LookForArgumentStart,
 			LookForArgumentEnd
 		};
 		std::string firstArgument, secondArgument;
 		unsigned argumentCount = 0, argumentStart = 0;
 		CommentParseState state = CommentParseState::LookForDirectiveEnd;
 		for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
 		{
 			switch(state)
 			{
 				case CommentParseState::LookForDirectiveEnd:
 				if(IsWhiteSpace(string[i]))
 				{
 					if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
 					{
 						// Nonzero = not equal
 						return;
 					}
 					state = CommentParseState::LookForArgumentStart;
 				}
 				break;
 				case CommentParseState::LookForArgumentStart:
 				if(!IsWhiteSpace(string[i]))
 				{
 					argumentStart = i;
 					state = CommentParseState::LookForArgumentEnd;
 				}
 				break;
 				case CommentParseState::LookForArgumentEnd:
 				if (IsWhiteSpace(string[i]))
 				{
 					state = CommentParseState::LookForArgumentStart;
 					switch(argumentCount)
 					{
 						case 0:
 						firstArgument = string.substr(argumentStart, i - argumentStart);
 						break;
 						case 1:
 						secondArgument = string.substr(argumentStart, i - argumentStart);
 						break;
 						default:
 						goto end_state_loop;
 					}
 					++argumentCount;
 				}
 				break;
 			}
 		}
 end_state_loop:
 		switch(state)
 		{
 			case CommentParseState::LookForDirectiveEnd:
 			case CommentParseState::LookForArgumentStart:
 			break;
 			case CommentParseState::LookForArgumentEnd:
 			switch(argumentCount)
 			{
 				case 0:
 				firstArgument = string.substr(argumentStart);
 				break;
 				case 1:
 				secondArgument = string.substr(argumentStart);
 				break;
 			}
 			++argumentCount;
 			break;
 		}
 		if (argumentCount > 0)
 		{
 			substitutions.push_back(std::make_pair(firstArgument, secondArgument));
 		}
 		else
 		{
 			std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
 		}
 	}
 	void Tokenizer::Tokenize(
 		std::string const & line,
 		int const lineNumber,
 		std::vector<Token> & tokens)
 	{
 		enum class TokenizerState
 		{
@@ -114,7 +246,7 @@ namespace Token
 				{
 					if (line[column] == '#')
 					{
-						// Ignore comments
+						ParseComment(line, lineNumber, column);
 						return;
 					}
@@ -123,7 +255,8 @@ namespace Token
 					switch(line[column])
 					{
 						case ';':
-						tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column));
+						tokens.push_back(
 							ExtractToken(line.substr(column, 1), lineNumber, column));
 						break;
 						default:
@@ -154,7 +287,10 @@ namespace Token
 		switch(state)
 		{
 			case TokenizerState::LookForTokenEnd:
-			tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
+			tokens.push_back(ExtractToken(
 				line.substr(columnTokenStart, line.size()),
 				lineNumber,
 				columnTokenStart));
 			break;
 			case TokenizerState::LookForNextToken: