Introduce simple character literal

2020-08-28 20:00:49 +02:00
parent d396628310
commit 593506a907
4 changed files with 102 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -23,7 +23,9 @@ afterwards can be a bit cryptic as to where it originated.
 - `[operation][number type]`, e.g. `divi` for divide (div) integer
 - `%[register]` for addressing registers
- `$[value]` for using literals/immediate values
+- `$[value]` for using immediate (literal) integer values
 - `'a'` for using immediate character values (currently only supports non
 escaped characters)
 - `;` for end of statement (mandatory)
 - `[label]:` for labels
 - `#[text]` for comments: any text is ignored till a newline (`\n`) is found
@@ -63,7 +65,8 @@ The following whitespace characters are used to separate symbols:
 - newline (`\n`)
 The following characters are used as identifiers:
- dollar (`$`) for immediate (literal) values
+- dollar (`$`) for immediate (literal) integer values
 - single quote (`'`) for immediate character values
 - percentage (`%`) for register identifiers
 - colon (`:`) for jump labels
 - semicolon (`;`) for statement termination
@@ -86,7 +89,7 @@ second byte of location `$900`).
 All symbols are reserved keywords and can therefore NOT be used as labels.
 There is currently no strict checking, so be careful.
-## Preprocessor
+### Preprocessor
 All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
 directives do not halt compilation, they are merely reported and then ignored.
--- a/bin/test.wasm
+++ b/bin/test.wasm
@@ -24,9 +24,9 @@ jmp count_loop;
 #DEFINE PRINT_CHAR $0
 # Hello world
-seti %A $72;  # H
+seti %A 'H';  # H
 int PRINT_CHAR;
-seti %A $101; # e
+seti %A 'e'; # e
 int PRINT_CHAR;
 seti %A $108; # l
 int PRINT_CHAR;
--- a/include/token/tokenizer.hpp
+++ b/include/token/tokenizer.hpp
@@ -10,8 +10,21 @@ namespace Token
 	private:
 		std::vector<std::pair<std::string, std::string>> substitutions;
-		Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
+		Token ExtractToken(
-		void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
+			std::string string,
 			int const lineNumber,
 			int const lineColumn) const;
 		void ParseComment(
 			std::string const & string,
 			int const lineNumber,
 			int const lineColumn);
 		void ParseCharacterLiteral(
 			std::string const & line,
 			int const lineNumber,
 			unsigned & lineColumn,
 			std::vector<Token> & tokens) const;
 	public:
 		void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
--- a/src/token/tokenizer.cpp
+++ b/src/token/tokenizer.cpp
@@ -1,20 +1,21 @@
 #include <map>
 #include <optional>
 #include <stdexcept>
 #include <token/errors.hpp>
 #include <token/tokenizer.hpp>
 namespace Token
 {
-	std::tuple<int, bool> TryParseInt(std::string const & string)
+	std::optional<int> TryParseInt(std::string const & string)
 	{
 		try
 		{
 			int value = std::stoi(string);
-			return std::make_tuple(value, true);
+			return std::make_optional<int>(value);
 		}
 		catch(std::invalid_argument &)
 		{
-			return std::make_tuple(0, false);
+			return std::nullopt;
 		}
 	}
@@ -42,9 +43,19 @@ namespace Token
 			case '$':
 			{
 				auto const result = TryParseInt(string.substr(1, string.size()));
 				if (result.has_value())
 				{
 					return Token::CreateImmediateValueToken(
-					std::get<0>(result),
+						result.value(),
-					std::get<1>(result),
+						true,
 						lineNumber,
 						lineColumn);
 				}
 				return Token::CreateImmediateValueToken(
 					0,
 					false,
 					lineNumber,
 					lineColumn);
 			}
@@ -85,9 +96,19 @@ namespace Token
 			if (memoryPrefix == '$')
 			{
 				auto const result = TryParseInt(valueString);
 				if (result.has_value())
 				{
 					return Token::CreateMemoryToken(
-					std::get<0>(result),
+						result.value(),
-					std::get<1>(result),
+						true,
 						lineNumber,
 						lineColumn);
 				}
 				return Token::CreateMemoryToken(
 					0,
 					false,
 					lineNumber,
 					lineColumn);
 			}
@@ -224,6 +245,51 @@ end_state_loop:
 		}
 	}
 	// Modifies the lineColumn parameter to point at the character literal end
 	void Tokenizer::ParseCharacterLiteral(
 		std::string const & line,
 		int const lineNumber,
 		unsigned & lineColumn,
 		std::vector<Token> & tokens) const
 	{
 		for(unsigned int i = lineColumn + 1; i < line.size(); ++i)
 		{
 			if (line[i] == '\'')
 			{
 				// Character literal must be exactly length 3 for now (2x ' + 1
 				// character in between)
 				if (lineColumn + 2u != i)
 				{
 					tokens.emplace_back(Token::CreateImmediateValueToken(
 						0,
 						false,
 						lineNumber,
 						lineColumn));
 				}
 				else
 				{
 					tokens.emplace_back(Token::CreateImmediateValueToken(
 						line[i - 1],
 						true,
 						lineNumber,
 						lineColumn));
 				}
 				lineColumn = i;
 				return;
 			}
 		}
 		// Non terminated character literal!
 		tokens.emplace_back(Token::CreateImmediateValueToken(
 			0,
 			false,
 			lineNumber,
 			lineColumn));
 		lineColumn = line.size();
 	}
 	void Tokenizer::Tokenize(
 		std::string const & line,
 		int const lineNumber,
@@ -250,6 +316,12 @@ end_state_loop:
 						return;
 					}
 					if (line[column] == '\'')
 					{
 						ParseCharacterLiteral(line, lineNumber, column, tokens);
 						break;
 					}
 					columnTokenStart = column;
 					switch(line[column])