Add DEFINE preprocessor directive

2020-05-30 12:21:50 +02:00
parent 3bb2cc17e9
commit 0286f25e8d
5 changed files with 174 additions and 29 deletions
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ language, or keep on reading!
 ## From Text To Runtime Behaviour

 In order to turn the source text into executable code we use 3 passes:
- Pass 1: tokenization (syntax check)
+- Pass 1: tokenization (syntax check) and preprocessing (substitution)
 - Pass 2: interpretation (semantics check)
 - Pass 3: execution (runtime check)

@@ -69,7 +69,6 @@ The following characters are used as identifiers:
 - semicolon (`;`) for statement termination
 - hash (`#`) for comments
 - square brackets (`[` and `]`) for addressing memory
- double quotes (`"`) for string values

 ## Memory Model

@@ -87,12 +86,16 @@ second byte of location `$900`).
 All symbols are reserved keywords and can therefore NOT be used as labels.
 There is currently no strict checking, so be careful.

-## Directives
+## Preprocessor

- `DECLARE` declares the first label argument to equal the second, immediate
-	value, argument and is used to declare a constant for the virtual machine.
- `STRING` puts the string value declared as the second argument in the memory
-	memory location of the first immediate argument
+All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
+directives do not halt compilation, they are merely reported and then ignored.
+
+- `DEFINE` replaces any occurrence of the first argument by the second argument.
+	The second argument may be empty, effectively deleting occurences of argument
+	one. Quotes are currently not supported and arguments are separated by
+	whitespace. If multiple defines exist for the same substitution the first
+	declared is used.

 ### Operands

--- a/bin/test.wasm
+++ b/bin/test.wasm
@@ -1,4 +1,4 @@
-#DECLARE MEMORY_SIZE $4096;
+#DEFINE

 addi $10 $-5 %A;
 subi %A $2 %B;
@@ -22,16 +22,17 @@ seti %A %B;
 lti %A $10;
 jmp count_loop;

+#DEFINE PRINT_CHAR $0
 # Hello world
 seti %A $72;  # H
-int $0;
+int PRINT_CHAR;
 seti %A $101; # e
-int $0;
+int PRINT_CHAR;
 seti %A $108; # l
-int $0;
-int $0;
+int PRINT_CHAR;
+int PRINT_CHAR;
 seti %A $111; # o
-int $0;
+int PRINT_CHAR;

 seti %A $32; # space
 int $0;
--- a/include/token/tokenizer.hpp
+++ b/include/token/tokenizer.hpp
@@ -7,6 +7,12 @@ namespace Token
 {
 	class Tokenizer
 	{
+	private:
+		std::vector<std::pair<std::string, std::string>> substitutions;
+
+		Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
+		void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
+
 	public:
 		void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
 	};
--- a/include/wassembler.hpp
+++ b/include/wassembler.hpp
@@ -1,7 +1,6 @@
 #pragma once
 #include <configuration.hpp>
 #include <execute/virtualmachine.hpp>
-#include <preprocessor/preprocessor.hpp>
 #include <interpret/interpreter.hpp>
 #include <token/tokenizer.hpp>

--- a/src/token/tokenizer.cpp
+++ b/src/token/tokenizer.cpp
@@ -5,11 +5,6 @@

 namespace Token
 {
-	bool IsWhiteSpace(char const c)
-	{
-		return c == '\n' || c == ' ' || c == '\t' || c == '\r';
-	}
-
 	std::tuple<int, bool> TryParseInt(std::string const & string)
 	{
 		try
@@ -23,24 +18,42 @@ namespace Token
 		}
 	}

-	Token ExtractToken(std::string const & string, int const lineNumber, int const lineColumn)
+	Token Tokenizer::ExtractToken(std::string string,
+	int const lineNumber,
+	int const lineColumn) const
 	{
 		if (string.size() == 0)
 		{
 			return Token::CreateUnknownToken(lineNumber, lineColumn);
 		}

+		for(std::size_t i = 0; i < substitutions.size(); ++i)
+		{
+			if (string == substitutions[i].first)
+			{
+				string = substitutions[i].second;
+				break;
+			}
+		}
+
 		char const prefix = string[0];
 		switch(prefix)
 		{
 			case '$':
 			{
 				auto const result = TryParseInt(string.substr(1, string.size()));
-				return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
+				return Token::CreateImmediateValueToken(
+					std::get<0>(result),
+					std::get<1>(result),
+					lineNumber,
+					lineColumn);
 			}

 			case '%':
-			return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn);
+			return Token::CreateRegisterToken(GetRegisterType(
+				string.substr(1, string.size())),
+				lineNumber,
+				lineColumn);

 			case ';':
 			return Token::CreateStatementEndToken(lineNumber, lineColumn);
@@ -53,7 +66,11 @@ namespace Token
 		if (postfix == ':')
 		{
 			// TODO check if label is an Operand?
-			return Token::CreateLabelToken(string.substr(0, string.size() - 1), true, lineNumber, lineColumn);
+			return Token::CreateLabelToken(
+				string.substr(0, string.size() - 1),
+				true,
+				lineNumber,
+				lineColumn);
 		}

 		if (prefix == '[' && postfix == ']')
@@ -68,11 +85,18 @@ namespace Token
 			if (memoryPrefix == '$')
 			{
 				auto const result = TryParseInt(valueString);
-				return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
+				return Token::CreateMemoryToken(
+					std::get<0>(result),
+					std::get<1>(result),
+					lineNumber,
+					lineColumn);
 			}
 			else if (memoryPrefix == '%')
 			{
-				return Token::CreateMemoryToken(GetRegisterType(valueString), lineNumber, lineColumn);
+				return Token::CreateMemoryToken(
+					GetRegisterType(valueString),
+					lineNumber,
+					lineColumn);
 			}
 			else
 			{
@@ -95,7 +119,115 @@ namespace Token
 		return Token::CreateLabelToken(string, true, lineNumber, lineColumn);
 	}

-	void Tokenizer::Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens)
+	bool IsWhiteSpace(char const c)
+	{
+		return c == '\n' || c == ' ' || c == '\t' || c == '\r';
+	}
+
+	void Tokenizer::ParseComment(
+		std::string const & string,
+		int const lineNumber,
+		int const lineColumn)
+	{
+		unsigned const commentContentStart = lineColumn + 1;
+		if (string.size() < commentContentStart ||
+			IsWhiteSpace(string[commentContentStart]))
+		{
+			return;
+		}
+
+		enum class CommentParseState
+		{
+			LookForDirectiveEnd,
+			LookForArgumentStart,
+			LookForArgumentEnd
+		};
+		std::string firstArgument, secondArgument;
+		unsigned argumentCount = 0, argumentStart = 0;
+		CommentParseState state = CommentParseState::LookForDirectiveEnd;
+		for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
+		{
+			switch(state)
+			{
+				case CommentParseState::LookForDirectiveEnd:
+				if(IsWhiteSpace(string[i]))
+				{
+					if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
+					{
+						// Nonzero = not equal
+						return;
+					}
+
+					state = CommentParseState::LookForArgumentStart;
+				}
+				break;
+
+				case CommentParseState::LookForArgumentStart:
+				if(!IsWhiteSpace(string[i]))
+				{
+					argumentStart = i;
+					state = CommentParseState::LookForArgumentEnd;
+				}
+				break;
+
+				case CommentParseState::LookForArgumentEnd:
+				if (IsWhiteSpace(string[i]))
+				{
+					state = CommentParseState::LookForArgumentStart;
+					switch(argumentCount)
+					{
+						case 0:
+						firstArgument = string.substr(argumentStart, i - argumentStart);
+						break;
+
+						case 1:
+						secondArgument = string.substr(argumentStart, i - argumentStart);
+						break;
+
+						default:
+						goto end_state_loop;
+					}
+					++argumentCount;
+				}
+				break;
+			}
+		}
+end_state_loop:
+		switch(state)
+		{
+			case CommentParseState::LookForDirectiveEnd:
+			case CommentParseState::LookForArgumentStart:
+			break;
+
+			case CommentParseState::LookForArgumentEnd:
+			switch(argumentCount)
+			{
+				case 0:
+				firstArgument = string.substr(argumentStart);
+				break;
+
+				case 1:
+				secondArgument = string.substr(argumentStart);
+				break;
+			}
+			++argumentCount;
+			break;
+		}
+
+		if (argumentCount > 0)
+		{
+			substitutions.push_back(std::make_pair(firstArgument, secondArgument));
+		}
+		else
+		{
+			std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
+		}
+	}
+
+	void Tokenizer::Tokenize(
+		std::string const & line,
+		int const lineNumber,
+		std::vector<Token> & tokens)
 	{
 		enum class TokenizerState
 		{
@@ -114,7 +246,7 @@ namespace Token
 				{
 					if (line[column] == '#')
 					{
-						// Ignore comments
+						ParseComment(line, lineNumber, column);
 						return;
 					}

@@ -123,7 +255,8 @@ namespace Token
 					switch(line[column])
 					{
 						case ';':
-						tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column));
+						tokens.push_back(
+							ExtractToken(line.substr(column, 1), lineNumber, column));
 						break;

 						default:
@@ -154,7 +287,10 @@ namespace Token
 		switch(state)
 		{
 			case TokenizerState::LookForTokenEnd:
-			tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
+			tokens.push_back(ExtractToken(
+				line.substr(columnTokenStart, line.size()),
+				lineNumber,
+				columnTokenStart));
 			break;

 			case TokenizerState::LookForNextToken: