Introduce simple character literal

2020-08-28 20:00:49 +02:00
parent d396628310
commit 593506a907
4 changed files with 102 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -23,7 +23,9 @@ afterwards can be a bit cryptic as to where it originated.

 - `[operation][number type]`, e.g. `divi` for divide (div) integer
 - `%[register]` for addressing registers
- `$[value]` for using literals/immediate values
+- `$[value]` for using immediate (literal) integer values
+- `'a'` for using immediate character values (currently only supports non
+escaped characters)
 - `;` for end of statement (mandatory)
 - `[label]:` for labels
 - `#[text]` for comments: any text is ignored till a newline (`\n`) is found
@@ -63,7 +65,8 @@ The following whitespace characters are used to separate symbols:
 - newline (`\n`)

 The following characters are used as identifiers:
- dollar (`$`) for immediate (literal) values
+- dollar (`$`) for immediate (literal) integer values
+- single quote (`'`) for immediate character values
 - percentage (`%`) for register identifiers
 - colon (`:`) for jump labels
 - semicolon (`;`) for statement termination
@@ -86,7 +89,7 @@ second byte of location `$900`).
 All symbols are reserved keywords and can therefore NOT be used as labels.
 There is currently no strict checking, so be careful.

-## Preprocessor
+### Preprocessor

 All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
 directives do not halt compilation, they are merely reported and then ignored.
--- a/bin/test.wasm
+++ b/bin/test.wasm
@@ -24,9 +24,9 @@ jmp count_loop;

 #DEFINE PRINT_CHAR $0
 # Hello world
-seti %A $72;  # H
+seti %A 'H';  # H
 int PRINT_CHAR;
-seti %A $101; # e
+seti %A 'e'; # e
 int PRINT_CHAR;
 seti %A $108; # l
 int PRINT_CHAR;
--- a/include/token/tokenizer.hpp
+++ b/include/token/tokenizer.hpp
@@ -10,8 +10,21 @@ namespace Token
 	private:
 		std::vector<std::pair<std::string, std::string>> substitutions;

-		Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
-		void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
+		Token ExtractToken(
+			std::string string,
+			int const lineNumber,
+			int const lineColumn) const;
+
+		void ParseComment(
+			std::string const & string,
+			int const lineNumber,
+			int const lineColumn);
+
+		void ParseCharacterLiteral(
+			std::string const & line,
+			int const lineNumber,
+			unsigned & lineColumn,
+			std::vector<Token> & tokens) const;

 	public:
 		void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
--- a/src/token/tokenizer.cpp
+++ b/src/token/tokenizer.cpp
@@ -1,20 +1,21 @@
 #include <map>
+#include <optional>
 #include <stdexcept>
 #include <token/errors.hpp>
 #include <token/tokenizer.hpp>

 namespace Token
 {
-	std::tuple<int, bool> TryParseInt(std::string const & string)
+	std::optional<int> TryParseInt(std::string const & string)
 	{
 		try
 		{
 			int value = std::stoi(string);
-			return std::make_tuple(value, true);
+			return std::make_optional<int>(value);
 		}
 		catch(std::invalid_argument &)
 		{
-			return std::make_tuple(0, false);
+			return std::nullopt;
 		}
 	}

@@ -42,9 +43,19 @@ namespace Token
 			case '$':
 			{
 				auto const result = TryParseInt(string.substr(1, string.size()));
+
+				if (result.has_value())
+				{
+					return Token::CreateImmediateValueToken(
+						result.value(),
+						true,
+						lineNumber,
+						lineColumn);
+				}
+
 				return Token::CreateImmediateValueToken(
-					std::get<0>(result),
-					std::get<1>(result),
+					0,
+					false,
 					lineNumber,
 					lineColumn);
 			}
@@ -85,9 +96,19 @@ namespace Token
 			if (memoryPrefix == '$')
 			{
 				auto const result = TryParseInt(valueString);
+
+				if (result.has_value())
+				{
+					return Token::CreateMemoryToken(
+						result.value(),
+						true,
+						lineNumber,
+						lineColumn);
+				}
+
 				return Token::CreateMemoryToken(
-					std::get<0>(result),
-					std::get<1>(result),
+					0,
+					false,
 					lineNumber,
 					lineColumn);
 			}
@@ -224,6 +245,51 @@ end_state_loop:
 		}
 	}

+	// Modifies the lineColumn parameter to point at the character literal end
+	void Tokenizer::ParseCharacterLiteral(
+		std::string const & line,
+		int const lineNumber,
+		unsigned & lineColumn,
+		std::vector<Token> & tokens) const
+	{
+		for(unsigned int i = lineColumn + 1; i < line.size(); ++i)
+		{
+			if (line[i] == '\'')
+			{
+				// Character literal must be exactly length 3 for now (2x ' + 1
+				// character in between)
+				if (lineColumn + 2u != i)
+				{
+					tokens.emplace_back(Token::CreateImmediateValueToken(
+						0,
+						false,
+						lineNumber,
+						lineColumn));
+				}
+				else
+				{
+					tokens.emplace_back(Token::CreateImmediateValueToken(
+						line[i - 1],
+						true,
+						lineNumber,
+						lineColumn));
+				}
+
+				lineColumn = i;
+				return;
+			}
+		}
+
+		// Non terminated character literal!
+		tokens.emplace_back(Token::CreateImmediateValueToken(
+			0,
+			false,
+			lineNumber,
+			lineColumn));
+
+		lineColumn = line.size();
+	}
+
 	void Tokenizer::Tokenize(
 		std::string const & line,
 		int const lineNumber,
@@ -250,6 +316,12 @@ end_state_loop:
 						return;
 					}

+					if (line[column] == '\'')
+					{
+						ParseCharacterLiteral(line, lineNumber, column, tokens);
+						break;
+					}
+
 					columnTokenStart = column;

 					switch(line[column])