diff --git a/README.md b/README.md index f320df2..9ab5c2b 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,7 @@ afterwards can be a bit cryptic as to where it originated. - `[operation][number type]`, e.g. `divi` for divide (div) integer - `%[register]` for addressing registers - `$[value]` for using immediate (literal) integer values -- `'a'` for using immediate character values (currently only supports non -escaped characters) +- `'a'` for using immediate character values - `;` for end of statement (mandatory) - `[label]:` for labels - `#[text]` for comments: any text is ignored till a newline (`\n`) is found @@ -92,13 +91,33 @@ There is currently no strict checking, so be careful. ### Preprocessor All preprocessor directives are prefixed by a `#`. Ill formed preprocessor -directives do not halt compilation, they are merely reported and then ignored. +directives do not halt compilation, they are merely ignored. All preprocessing +is done in a single pass. Recursion or definition of a directive by another +directive is not supported therefore. -- `DEFINE` replaces any occurrence of the first argument by the second argument. - The second argument may be empty, effectively deleting occurences of argument - one. Quotes are currently not supported and arguments are separated by - whitespace. If multiple defines exist for the same substitution the first - declared is used. +- `DEFINE [y]` replaces any occurrence of the first argument (`x`) by the + second optional argument (`y`). The second argument can be empty, effectively + deleting all occurrences of `x`. Quotes are currently not supported and + arguments are separated by whitespace. If multiple defines exist the later + declarations will overwrite the previous. + +### Registers + +All registers are 32 bits wide. The following 4 registers currently exist: + +- A +- B +- C +- D + +### Immediates + +An immediate integer value for 42 is for examle `$42`. Negative values are +allowed, for example `$-42`. Notation must be in decimal, hexadecimal and octals +are **not supported**. + +The immediate character value for the letter g is `'g'`. Character values must +be a single character, escaped or multi byte characters are **not supported**. ### Operands diff --git a/bin/test.wasm b/bin/test.wasm index 2ff3020..6dae79b 100644 --- a/bin/test.wasm +++ b/bin/test.wasm @@ -1,4 +1,4 @@ -#DEFINE +#DEFINE LETTER_O 'o' addi $10 $-5 %A; subi %A $2 %B; @@ -31,7 +31,7 @@ int PRINT_CHAR; seti %A $108; # l int PRINT_CHAR; int PRINT_CHAR; -seti %A $111; # o +seti %A LETTER_O; int PRINT_CHAR; seti %A $32; # space @@ -105,9 +105,6 @@ seti %B $10; int $5; int $3; -# Demonstrate string literals -"Hello world!"; - exit; noop_function: diff --git a/include/preprocessor/preprocessor.hpp b/include/preprocessor/preprocessor.hpp new file mode 100644 index 0000000..880b8d5 --- /dev/null +++ b/include/preprocessor/preprocessor.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include + +class Preprocessor +{ +private: + std::vector substitutionIdentifiers; + std::vector substitutionValues; + + void extractComment(std::string & line, + std::size_t const lineNumber, + std::size_t const lineColumn); + + void processLine(std::string & line, std::size_t const lineNumber); + +public: + void process(std::vector & lines); + + void printSubstitutions() const; +}; \ No newline at end of file diff --git a/include/token/tokenizer.hpp b/include/token/tokenizer.hpp index 9c648b1..1091d68 100644 --- a/include/token/tokenizer.hpp +++ b/include/token/tokenizer.hpp @@ -15,11 +15,6 @@ namespace Token int const lineNumber, int const lineColumn) const; - void ParseComment( - std::string const & string, - int const lineNumber, - int const lineColumn); - void ParseCharacterLiteral( std::string const & line, int const lineNumber, diff --git a/include/utils.hpp b/include/utils.hpp new file mode 100644 index 0000000..ea585aa --- /dev/null +++ b/include/utils.hpp @@ -0,0 +1,6 @@ +#pragma once + +namespace Utils +{ + bool isWhitespaceCharacter(char const c); +} \ No newline at end of file diff --git a/include/wassembler.hpp b/include/wassembler.hpp index d0ae1d8..dd9e5fe 100644 --- a/include/wassembler.hpp +++ b/include/wassembler.hpp @@ -9,12 +9,15 @@ class Wassembler private: Configuration config; Execute::VirtualMachine vm; + bool printSubstitutions; bool LoadLinesFromFile(std::string const & filePath, std::vector & lines) const; bool LoadTokens(std::vector const & lines, std::vector & tokens) const; public: void SetMemorySize(unsigned const size); + void EnableSubstitutionsLogging(); + bool LoadFromFile(std::string const & filePath); void Run(); diff --git a/makefile b/makefile index 40d93ba..220736a 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ BINARY = bin/wassembler all: ${BINARY} check: ${BINARY} - ./$< ./bin/test.wasm + ./$< ./bin/test.wasm -p clean: -rm -rf build ./${BINARY} diff --git a/src/main.cpp b/src/main.cpp index f227be9..03c9119 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,10 +7,12 @@ int main(int argc, char ** argv) { std::string inputFile; unsigned memorySize = 1024; + bool printSubstitutions = false; auto cli = ( clipp::value("input wasm file").set(inputFile), - clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize) + clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize), + clipp::option("-p", "--print-substitutions").set(printSubstitutions) ); if (!clipp::parse(argc, argv, cli)) @@ -21,6 +23,11 @@ int main(int argc, char ** argv) Wassembler wassembler; wassembler.SetMemorySize(memorySize); + if (printSubstitutions) + { + wassembler.EnableSubstitutionsLogging(); + } + if (!wassembler.LoadFromFile(inputFile)) { exit(1); diff --git a/src/preprocessor/preprocessor.cpp b/src/preprocessor/preprocessor.cpp new file mode 100644 index 0000000..c14d9a6 --- /dev/null +++ b/src/preprocessor/preprocessor.cpp @@ -0,0 +1,159 @@ +#include +#include + +bool trySubstitute( + std::string & line, + std::size_t const lineColumn, + std::vector const & substitutionIdentifiers, + std::vector const & substitutionValues) +{ + for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i) + { + if (line.compare(lineColumn, substitutionIdentifiers[i].size(), substitutionIdentifiers[i]) != 0) + { + continue; + } + + std::string const lineCopy = line; + line = lineCopy.substr(0, lineColumn) + substitutionValues[i]; + if (lineCopy.size() > lineColumn + substitutionIdentifiers[i].size()) + { + line += lineCopy.substr(lineColumn + substitutionIdentifiers[i].size(), lineCopy.size()); + } + + return true; + } + + return false; +} + +void Preprocessor::extractComment( + std::string & line, + std::size_t const lineNumber, + std::size_t const lineColumn) +{ + if (line.size() <= lineColumn + 1 || + line.compare(lineColumn + 1, std::string::npos, "DEFINE") <= 0) + { + // No match or empty DEFINE statement + line = line.substr(0, lineColumn); + } + + enum CommentParseState + { + LookForArgumentStart, + LookForArgumentEnd + }; + + std::string firstArgument, secondArgument; + std::size_t argumentCount = 0, argumentStart = 0; + CommentParseState state = LookForArgumentStart; + for(std::size_t i = lineColumn + 7; i < line.size(); ++i) + { + switch(state) + { + case LookForArgumentStart: + if(!Utils::isWhitespaceCharacter(line[i])) + { + argumentStart = i; + state = CommentParseState::LookForArgumentEnd; + } + break; + + case LookForArgumentEnd: + if (Utils::isWhitespaceCharacter(line[i])) + { + switch(argumentCount) + { + case 0: + firstArgument = line.substr(argumentStart, i - argumentStart); + break; + + case 1: + secondArgument = line.substr(argumentStart, i - argumentStart); + break; + + default: + break; + } + + ++argumentCount; + state = CommentParseState::LookForArgumentStart; + } + break; + } + } + + switch(state) + { + case CommentParseState::LookForArgumentStart: + break; + + case CommentParseState::LookForArgumentEnd: + switch(argumentCount) + { + case 0: + firstArgument = line.substr(argumentStart); + break; + + case 1: + secondArgument = line.substr(argumentStart); + break; + + default: + break; + } + ++argumentCount; + break; + } + + if (argumentCount > 0) + { + substitutionIdentifiers.push_back(firstArgument); + substitutionValues.push_back(secondArgument); + } + + line = line.substr(0, lineColumn); +} + +void Preprocessor::processLine(std::string & line, std::size_t const lineNumber) +{ + for(std::size_t i = 0; i < line.size(); ++i) + { + if (!Utils::isWhitespaceCharacter(line[i])) + { + if (trySubstitute(line, i, substitutionIdentifiers, substitutionValues)) + { + continue; + } + + if (line[i] == '#') + { + extractComment(line, lineNumber, i); + return; + } + } + } +} + +void Preprocessor::process(std::vector & lines) +{ + substitutionIdentifiers.clear(); + substitutionValues.clear(); + + for(std::size_t i = 0; i < lines.size(); ++i) + { + processLine(lines[i], i); + } +} + +void Preprocessor::printSubstitutions() const +{ + for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i) + { + std::printf( + "%s -> %s\n", + substitutionIdentifiers[i].c_str(), + substitutionValues[i].c_str()); + } +} \ No newline at end of file diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp index 0d54a40..a6244ee 100644 --- a/src/token/tokenizer.cpp +++ b/src/token/tokenizer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace Token { @@ -150,111 +151,6 @@ namespace Token return Token::CreateLabelToken(string, lineNumber, lineColumn); } - bool IsWhiteSpace(char const c) - { - return c == '\n' || c == ' ' || c == '\t' || c == '\r'; - } - - void Tokenizer::ParseComment( - std::string const & string, - int const lineNumber, - int const lineColumn) - { - unsigned const commentContentStart = lineColumn + 1; - if (string.size() < commentContentStart || - IsWhiteSpace(string[commentContentStart])) - { - return; - } - - enum class CommentParseState - { - LookForDirectiveEnd, - LookForArgumentStart, - LookForArgumentEnd - }; - std::string firstArgument, secondArgument; - unsigned argumentCount = 0, argumentStart = 0; - CommentParseState state = CommentParseState::LookForDirectiveEnd; - for(unsigned i = commentContentStart + 1; i < string.size(); ++i) - { - switch(state) - { - case CommentParseState::LookForDirectiveEnd: - if(IsWhiteSpace(string[i])) - { - if (string.compare(commentContentStart, i - commentContentStart, "DEFINE")) - { - // Nonzero = not equal - return; - } - - state = CommentParseState::LookForArgumentStart; - } - break; - - case CommentParseState::LookForArgumentStart: - if(!IsWhiteSpace(string[i])) - { - argumentStart = i; - state = CommentParseState::LookForArgumentEnd; - } - break; - - case CommentParseState::LookForArgumentEnd: - if (IsWhiteSpace(string[i])) - { - state = CommentParseState::LookForArgumentStart; - switch(argumentCount) - { - case 0: - firstArgument = string.substr(argumentStart, i - argumentStart); - break; - - case 1: - secondArgument = string.substr(argumentStart, i - argumentStart); - break; - - default: - goto end_state_loop; - } - ++argumentCount; - } - break; - } - } -end_state_loop: - switch(state) - { - case CommentParseState::LookForDirectiveEnd: - case CommentParseState::LookForArgumentStart: - break; - - case CommentParseState::LookForArgumentEnd: - switch(argumentCount) - { - case 0: - firstArgument = string.substr(argumentStart); - break; - - case 1: - secondArgument = string.substr(argumentStart); - break; - } - ++argumentCount; - break; - } - - if (argumentCount > 0) - { - substitutions.push_back(std::make_pair(firstArgument, secondArgument)); - } - else - { - std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1); - } - } - // Modifies the lineColumn parameter to point at the character literal end void Tokenizer::ParseCharacterLiteral( std::string const & line, @@ -314,16 +210,12 @@ end_state_loop: switch(state) { case TokenizerState::LookForNextToken: - if (!IsWhiteSpace(line[column])) + if (!Utils::isWhitespaceCharacter(line[column])) { - if (line[column] == '#') - { - ParseComment(line, lineNumber, column); - return; - } - if (line[column] == '\'') { + // TODO integrate this better with the existing extract token + // infrastructure ParseCharacterLiteral(line, lineNumber, column, tokens); break; } @@ -345,7 +237,7 @@ end_state_loop: break; case TokenizerState::LookForTokenEnd: - if (IsWhiteSpace(line[column]) || line[column] == ';') + if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';') { tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart)); if (line[column] == ';') diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..b929180 --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,9 @@ +#include + +namespace Utils +{ + bool isWhitespaceCharacter(char const c) + { + return c == '\n' || c == ' ' || c == '\t' || c == '\r'; + } +} \ No newline at end of file diff --git a/src/wassembler.cpp b/src/wassembler.cpp index 188fbcd..8d8ea8a 100644 --- a/src/wassembler.cpp +++ b/src/wassembler.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -48,7 +49,6 @@ bool Wassembler::LoadLinesFromFile(std::string const & filePath, std::vector const & lines, std::vector & tokens) const { Token::Tokenizer tokenizer; @@ -86,6 +86,11 @@ void Wassembler::SetMemorySize(unsigned const size) config.memorySize = size; } +void Wassembler::EnableSubstitutionsLogging() +{ + printSubstitutions = true; +} + bool Wassembler::LoadFromFile(std::string const & filePath) { std::vector lines; @@ -95,6 +100,13 @@ bool Wassembler::LoadFromFile(std::string const & filePath) return false; } + Preprocessor preprocessor; + preprocessor.process(lines); + if (printSubstitutions) + { + preprocessor.printSubstitutions(); + } + std::vector tokens; if (!LoadTokens(lines, tokens)) {