diff --git a/README.md b/README.md index 2e8c867..cebc2e2 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ language, or keep on reading! ## From Text To Runtime Behaviour In order to turn the source text into executable code we use 3 passes: -- Pass 1: tokenization (syntax check) +- Pass 1: tokenization (syntax check) and preprocessing (substitution) - Pass 2: interpretation (semantics check) - Pass 3: execution (runtime check) @@ -69,7 +69,6 @@ The following characters are used as identifiers: - semicolon (`;`) for statement termination - hash (`#`) for comments - square brackets (`[` and `]`) for addressing memory -- double quotes (`"`) for string values ## Memory Model @@ -87,12 +86,16 @@ second byte of location `$900`). All symbols are reserved keywords and can therefore NOT be used as labels. There is currently no strict checking, so be careful. -## Directives +## Preprocessor -- `DECLARE` declares the first label argument to equal the second, immediate - value, argument and is used to declare a constant for the virtual machine. -- `STRING` puts the string value declared as the second argument in the memory - memory location of the first immediate argument +All preprocessor directives are prefixed by a `#`. Ill formed preprocessor +directives do not halt compilation, they are merely reported and then ignored. + +- `DEFINE` replaces any occurrence of the first argument by the second argument. + The second argument may be empty, effectively deleting occurences of argument + one. Quotes are currently not supported and arguments are separated by + whitespace. If multiple defines exist for the same substitution the first + declared is used. ### Operands diff --git a/bin/test.wasm b/bin/test.wasm index 89f992a..403d266 100644 --- a/bin/test.wasm +++ b/bin/test.wasm @@ -1,4 +1,4 @@ -#DECLARE MEMORY_SIZE $4096; +#DEFINE addi $10 $-5 %A; subi %A $2 %B; @@ -22,16 +22,17 @@ seti %A %B; lti %A $10; jmp count_loop; +#DEFINE PRINT_CHAR $0 # Hello world seti %A $72; # H -int $0; +int PRINT_CHAR; seti %A $101; # e -int $0; +int PRINT_CHAR; seti %A $108; # l -int $0; -int $0; +int PRINT_CHAR; +int PRINT_CHAR; seti %A $111; # o -int $0; +int PRINT_CHAR; seti %A $32; # space int $0; diff --git a/include/token/tokenizer.hpp b/include/token/tokenizer.hpp index 8f809d3..4b639b6 100644 --- a/include/token/tokenizer.hpp +++ b/include/token/tokenizer.hpp @@ -7,6 +7,12 @@ namespace Token { class Tokenizer { + private: + std::vector> substitutions; + + Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const; + void ParseComment(std::string const & string, int const lineNumber, int const lineColumn); + public: void Tokenize(std::string const & line, int const lineNumber, std::vector & tokens); }; diff --git a/include/wassembler.hpp b/include/wassembler.hpp index f40c18b..d0ae1d8 100644 --- a/include/wassembler.hpp +++ b/include/wassembler.hpp @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp index 70b8681..fcb053b 100644 --- a/src/token/tokenizer.cpp +++ b/src/token/tokenizer.cpp @@ -5,11 +5,6 @@ namespace Token { - bool IsWhiteSpace(char const c) - { - return c == '\n' || c == ' ' || c == '\t' || c == '\r'; - } - std::tuple TryParseInt(std::string const & string) { try @@ -23,24 +18,42 @@ namespace Token } } - Token ExtractToken(std::string const & string, int const lineNumber, int const lineColumn) + Token Tokenizer::ExtractToken(std::string string, + int const lineNumber, + int const lineColumn) const { if (string.size() == 0) { return Token::CreateUnknownToken(lineNumber, lineColumn); } + for(std::size_t i = 0; i < substitutions.size(); ++i) + { + if (string == substitutions[i].first) + { + string = substitutions[i].second; + break; + } + } + char const prefix = string[0]; switch(prefix) { case '$': { auto const result = TryParseInt(string.substr(1, string.size())); - return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); + return Token::CreateImmediateValueToken( + std::get<0>(result), + std::get<1>(result), + lineNumber, + lineColumn); } case '%': - return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn); + return Token::CreateRegisterToken(GetRegisterType( + string.substr(1, string.size())), + lineNumber, + lineColumn); case ';': return Token::CreateStatementEndToken(lineNumber, lineColumn); @@ -53,7 +66,11 @@ namespace Token if (postfix == ':') { // TODO check if label is an Operand? - return Token::CreateLabelToken(string.substr(0, string.size() - 1), true, lineNumber, lineColumn); + return Token::CreateLabelToken( + string.substr(0, string.size() - 1), + true, + lineNumber, + lineColumn); } if (prefix == '[' && postfix == ']') @@ -68,11 +85,18 @@ namespace Token if (memoryPrefix == '$') { auto const result = TryParseInt(valueString); - return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); + return Token::CreateMemoryToken( + std::get<0>(result), + std::get<1>(result), + lineNumber, + lineColumn); } else if (memoryPrefix == '%') { - return Token::CreateMemoryToken(GetRegisterType(valueString), lineNumber, lineColumn); + return Token::CreateMemoryToken( + GetRegisterType(valueString), + lineNumber, + lineColumn); } else { @@ -95,7 +119,115 @@ namespace Token return Token::CreateLabelToken(string, true, lineNumber, lineColumn); } - void Tokenizer::Tokenize(std::string const & line, int const lineNumber, std::vector & tokens) + bool IsWhiteSpace(char const c) + { + return c == '\n' || c == ' ' || c == '\t' || c == '\r'; + } + + void Tokenizer::ParseComment( + std::string const & string, + int const lineNumber, + int const lineColumn) + { + unsigned const commentContentStart = lineColumn + 1; + if (string.size() < commentContentStart || + IsWhiteSpace(string[commentContentStart])) + { + return; + } + + enum class CommentParseState + { + LookForDirectiveEnd, + LookForArgumentStart, + LookForArgumentEnd + }; + std::string firstArgument, secondArgument; + unsigned argumentCount = 0, argumentStart = 0; + CommentParseState state = CommentParseState::LookForDirectiveEnd; + for(unsigned i = commentContentStart + 1; i < string.size(); ++i) + { + switch(state) + { + case CommentParseState::LookForDirectiveEnd: + if(IsWhiteSpace(string[i])) + { + if (string.compare(commentContentStart, i - commentContentStart, "DEFINE")) + { + // Nonzero = not equal + return; + } + + state = CommentParseState::LookForArgumentStart; + } + break; + + case CommentParseState::LookForArgumentStart: + if(!IsWhiteSpace(string[i])) + { + argumentStart = i; + state = CommentParseState::LookForArgumentEnd; + } + break; + + case CommentParseState::LookForArgumentEnd: + if (IsWhiteSpace(string[i])) + { + state = CommentParseState::LookForArgumentStart; + switch(argumentCount) + { + case 0: + firstArgument = string.substr(argumentStart, i - argumentStart); + break; + + case 1: + secondArgument = string.substr(argumentStart, i - argumentStart); + break; + + default: + goto end_state_loop; + } + ++argumentCount; + } + break; + } + } +end_state_loop: + switch(state) + { + case CommentParseState::LookForDirectiveEnd: + case CommentParseState::LookForArgumentStart: + break; + + case CommentParseState::LookForArgumentEnd: + switch(argumentCount) + { + case 0: + firstArgument = string.substr(argumentStart); + break; + + case 1: + secondArgument = string.substr(argumentStart); + break; + } + ++argumentCount; + break; + } + + if (argumentCount > 0) + { + substitutions.push_back(std::make_pair(firstArgument, secondArgument)); + } + else + { + std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1); + } + } + + void Tokenizer::Tokenize( + std::string const & line, + int const lineNumber, + std::vector & tokens) { enum class TokenizerState { @@ -114,7 +246,7 @@ namespace Token { if (line[column] == '#') { - // Ignore comments + ParseComment(line, lineNumber, column); return; } @@ -123,7 +255,8 @@ namespace Token switch(line[column]) { case ';': - tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column)); + tokens.push_back( + ExtractToken(line.substr(column, 1), lineNumber, column)); break; default: @@ -154,7 +287,10 @@ namespace Token switch(state) { case TokenizerState::LookForTokenEnd: - tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart)); + tokens.push_back(ExtractToken( + line.substr(columnTokenStart, line.size()), + lineNumber, + columnTokenStart)); break; case TokenizerState::LookForNextToken: