From b84557b3e1bca000d5f84fab81a942a1a0c34930 Mon Sep 17 00:00:00 2001 From: Tijmen van Nesselrooij Date: Sun, 17 Nov 2019 21:02:35 +0100 Subject: [PATCH] Basic arithmetic and jump labels --- .gitignore | 1 + .vscode/.gitignore | 1 + .vscode/c_cpp_properties.json | 15 +++ bin/.gitignore | 1 + bin/example.wasm | 16 +++ design.md | 40 ++++++ include/execute/flags.hpp | 9 ++ include/execute/registers.hpp | 19 +++ include/execute/state.hpp | 16 +++ include/execute/virtualmachine.hpp | 33 +++++ include/interpret/code.hpp | 15 +++ include/interpret/errors.hpp | 62 +++++++++ include/interpret/interpreter.hpp | 14 ++ include/interpret/operanddefinitions.hpp | 11 ++ include/interpret/statement.hpp | 57 ++++++++ include/interpret/value.hpp | 26 ++++ include/token/operandtype.hpp | 19 +++ include/token/registertype.hpp | 16 +++ include/token/token.hpp | 27 ++++ include/token/tokenizer.hpp | 13 ++ include/token/tokentype.hpp | 14 ++ makefile | 29 ++++ src/execute/state.cpp | 11 ++ src/execute/virtualmachine.cpp | 52 +++++++ src/interpret/error.cpp | 58 ++++++++ src/interpret/interpreter.cpp | 77 +++++++++++ src/interpret/operanddefinitions.cpp | 165 +++++++++++++++++++++++ src/interpret/statement.cpp | 42 ++++++ src/interpret/value.cpp | 33 +++++ src/main.cpp | 133 ++++++++++++++++++ src/token/operandtype.cpp | 27 ++++ src/token/registertype.cpp | 24 ++++ src/token/token.cpp | 165 +++++++++++++++++++++++ src/token/tokenizer.cpp | 109 +++++++++++++++ 34 files changed, 1350 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/.gitignore create mode 100644 .vscode/c_cpp_properties.json create mode 100644 bin/.gitignore create mode 100644 bin/example.wasm create mode 100644 design.md create mode 100644 include/execute/flags.hpp create mode 100644 include/execute/registers.hpp create mode 100644 include/execute/state.hpp create mode 100644 include/execute/virtualmachine.hpp create mode 100644 include/interpret/code.hpp create mode 100644 include/interpret/errors.hpp create mode 100644 include/interpret/interpreter.hpp create mode 100644 include/interpret/operanddefinitions.hpp create mode 100644 include/interpret/statement.hpp create mode 100644 include/interpret/value.hpp create mode 100644 include/token/operandtype.hpp create mode 100644 include/token/registertype.hpp create mode 100644 include/token/token.hpp create mode 100644 include/token/tokenizer.hpp create mode 100644 include/token/tokentype.hpp create mode 100644 makefile create mode 100644 src/execute/state.cpp create mode 100644 src/execute/virtualmachine.cpp create mode 100644 src/interpret/error.cpp create mode 100644 src/interpret/interpreter.cpp create mode 100644 src/interpret/operanddefinitions.cpp create mode 100644 src/interpret/statement.cpp create mode 100644 src/interpret/value.cpp create mode 100644 src/main.cpp create mode 100644 src/token/operandtype.cpp create mode 100644 src/token/registertype.cpp create mode 100644 src/token/token.cpp create mode 100644 src/token/tokenizer.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d163863 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ \ No newline at end of file diff --git a/.vscode/.gitignore b/.vscode/.gitignore new file mode 100644 index 0000000..c3d3a20 --- /dev/null +++ b/.vscode/.gitignore @@ -0,0 +1 @@ +settings.json \ No newline at end of file diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..c36cc6f --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,15 @@ +{ + "configurations": [ + { + "name": "Linux", + "intelliSenseMode": "gcc-x64", + "includePath": ["${workspaceFolder}/include"], + "defines": [], + "forcedInclude": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++17" + } + ], + "version": 4 +} diff --git a/bin/.gitignore b/bin/.gitignore new file mode 100644 index 0000000..0afae4a --- /dev/null +++ b/bin/.gitignore @@ -0,0 +1 @@ +wassembly \ No newline at end of file diff --git a/bin/example.wasm b/bin/example.wasm new file mode 100644 index 0000000..3692fd9 --- /dev/null +++ b/bin/example.wasm @@ -0,0 +1,16 @@ +addi $10 $-5 %A; +subi %A $2 %B; +muli $2 %B %C; +divi $2 %C %D; + +loop: +addi $1 $0 %A; +addi $1 %A %B; +addi $1 %B %C; +addi $1 %C %D; + +subi %D $1 %D; +subi %C $1 %C; +subi %B $1 %B; +subi %A $1 %A; +jmp loop; \ No newline at end of file diff --git a/design.md b/design.md new file mode 100644 index 0000000..354ab04 --- /dev/null +++ b/design.md @@ -0,0 +1,40 @@ +# Design + +## Parsing + +- First we tokenize the input (syntax check) +- Then we interpret the input (semantics check) +- Then we execute the input + +## Notation + +- `[operation][number type]`, e.g. `divi` for divide (div) integer +- `%[register]` for addressing registers +- `$[value]` for using literals/immediate values +- `;` for end of statement (mandatory) +- `[label]:` for labels +- Elements must be separated by whitespace character + - Good: `add $2 $5 %A;` + - Bad: `add $2$5%A;` + +## Examples + +Divide register A by 5 and store the result in register A: +`divi %A $5 %A;` + +## Reserved symbols + +The following whitespace characters are used to separate symbols: +- space (' ') +- tab ('\t') +- return carriage ('\r') +- newline ('\n') + +The following characters are used as identifiers: +- dollar ('$') +- percentage ('%') +- colon (':') +- semicolon (';') +- hash ('#') + +All operands are reserved keywords and can therefore NOT be used as labels. diff --git a/include/execute/flags.hpp b/include/execute/flags.hpp new file mode 100644 index 0000000..3cc1da3 --- /dev/null +++ b/include/execute/flags.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace Execute +{ + struct Flags + { + + }; +} diff --git a/include/execute/registers.hpp b/include/execute/registers.hpp new file mode 100644 index 0000000..f7a9942 --- /dev/null +++ b/include/execute/registers.hpp @@ -0,0 +1,19 @@ +#pragma once + +namespace Execute +{ + struct Registers + { + union + { + struct + { + int A; + int B; + int C; + int D; + }; + int registers[4]; + }; + }; +} diff --git a/include/execute/state.hpp b/include/execute/state.hpp new file mode 100644 index 0000000..32604a7 --- /dev/null +++ b/include/execute/state.hpp @@ -0,0 +1,16 @@ +#pragma once +#include +#include + +namespace Execute +{ + struct State + { + unsigned currentStatement; + unsigned nextStatement; + std::unordered_map const & labelStatementIndice; + + + State(std::unordered_map const & labelStatementIndice); + }; +} \ No newline at end of file diff --git a/include/execute/virtualmachine.hpp b/include/execute/virtualmachine.hpp new file mode 100644 index 0000000..a55f428 --- /dev/null +++ b/include/execute/virtualmachine.hpp @@ -0,0 +1,33 @@ +#include +#include +#include +#include + +namespace Execute +{ + class VirtualMachine + { + private: + Flags flags; + Registers registers; + State state; + bool terminated; + + Interpret::Code const & code; + + void Step(); + + public: + void Run(); + void SingleStep(); + + Flags const & GetFlags() const; + Registers const & GetRegisters() const; + State const & GetState() const; + Interpret::Statement const * const GetCurrentStatement() const; + + bool IsTerminated() const; + + VirtualMachine(Interpret::Code const & code); + }; +} \ No newline at end of file diff --git a/include/interpret/code.hpp b/include/interpret/code.hpp new file mode 100644 index 0000000..d319dd6 --- /dev/null +++ b/include/interpret/code.hpp @@ -0,0 +1,15 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace Interpret +{ + struct Code + { + std::vector> statements; + std::unordered_map labelStatementIndice; + }; +} \ No newline at end of file diff --git a/include/interpret/errors.hpp b/include/interpret/errors.hpp new file mode 100644 index 0000000..91ed7ed --- /dev/null +++ b/include/interpret/errors.hpp @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include + +namespace Interpret +{ + struct TokenError : public std::exception + { + Token::Token errorToken; + std::string errorMsg; + TokenError(Token::Token const & token, std::string const & msg); + }; + + struct ExpectedArgument : public TokenError + { + ExpectedArgument(Token::Token const & token); + }; + + struct ExpectedLabel : public TokenError + { + ExpectedLabel(Token::Token const & token); + }; + + struct ExpectedImmediate : public TokenError + { + ExpectedImmediate(Token::Token const & token); + }; + + struct ExpectedRegister : public TokenError + { + ExpectedRegister(Token::Token const & token); + }; + + struct ExpectedOperand : public TokenError + { + ExpectedOperand(Token::Token const & token); + }; + + struct TooManyArguments : public TokenError + { + TooManyArguments(Token::Token const & token); + }; + + struct TooFewArguments : public TokenError + { + TooFewArguments(Token::Token const & token); + }; + + struct MissingEndOfStatment : public TokenError + { + MissingEndOfStatment(Token::Token const & token); + }; + + namespace Internal + { + struct BadTokenForValue : public TokenError + { + BadTokenForValue(Token::Token const & token); + }; + } +} diff --git a/include/interpret/interpreter.hpp b/include/interpret/interpreter.hpp new file mode 100644 index 0000000..d36ec80 --- /dev/null +++ b/include/interpret/interpreter.hpp @@ -0,0 +1,14 @@ +#pragma once +#include +#include +#include +#include + +namespace Interpret +{ + class Interpreter + { + public: + void Interpret(std::vector const & tokens, Code & code); + }; +} \ No newline at end of file diff --git a/include/interpret/operanddefinitions.hpp b/include/interpret/operanddefinitions.hpp new file mode 100644 index 0000000..eee1f33 --- /dev/null +++ b/include/interpret/operanddefinitions.hpp @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace Interpret +{ + std::unique_ptr ExtractStatement(unsigned const operatorIndex, std::vector const & tokens); + + int GetRequiredNumberOfArguments(Token::OperandType const type); +} \ No newline at end of file diff --git a/include/interpret/statement.hpp b/include/interpret/statement.hpp new file mode 100644 index 0000000..6aaa8d8 --- /dev/null +++ b/include/interpret/statement.hpp @@ -0,0 +1,57 @@ +#pragma once +#include +#include +#include +#include + +namespace Interpret +{ + struct Statement + { + virtual void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) = 0; + }; + + struct NoArgumentStatement : Statement + { + void (* function)(Execute::Flags & flags, Execute::Registers & registers); + void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) override; + }; + + struct OneArgumentStatement : Statement + { + void (* function)(Execute::Flags & flags, int argument1); + Value firstArgument; + + void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) override; + }; + + struct TwoArgumentStatement : Statement + { + void (* function)(Execute::Flags & flags, int argument1, int argument2); + Value firstArgument; + Value secondArgument; + + void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) override; + }; + + struct ThreeArgumentStatement : Statement + { + void (* function)(Execute::Flags & flags, int argument1, int argument2, int & argument3); + Value firstArgument; + Value secondArgument; + Value thirdArgument; + + void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) override; + }; + + struct JumpStatement : Statement + { + private: + std::string const label; + + public: + void Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) override; + + JumpStatement(std::string const & label); + }; +} \ No newline at end of file diff --git a/include/interpret/value.hpp b/include/interpret/value.hpp new file mode 100644 index 0000000..0b7d589 --- /dev/null +++ b/include/interpret/value.hpp @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace Interpret +{ + enum class ValueType + { + Register, + ImmediateInteger + }; + + struct Value + { + ValueType type; + union + { + int registerIndex; + int integer; + }; + + int & GetValue(Execute::Registers & registers); + + void CreateFromToken(Token::Token const & token); + }; +} \ No newline at end of file diff --git a/include/token/operandtype.hpp b/include/token/operandtype.hpp new file mode 100644 index 0000000..03bf098 --- /dev/null +++ b/include/token/operandtype.hpp @@ -0,0 +1,19 @@ +#pragma once +#include + +namespace Token +{ + enum class OperandType + { + Unknown = -1, + AddInteger = 0, + SubtractInteger, + DivideInteger, + MultiplyInteger, + ShiftIntegerLeft, + ShiftIntegerRight, + Jump + }; + + OperandType GetOperandType(std::string const & op); +} \ No newline at end of file diff --git a/include/token/registertype.hpp b/include/token/registertype.hpp new file mode 100644 index 0000000..354ea2b --- /dev/null +++ b/include/token/registertype.hpp @@ -0,0 +1,16 @@ +#pragma once +#include + +namespace Token +{ + enum class RegisterType + { + Unknown = -1, + A = 0, + B, + C, + D + }; + + RegisterType GetRegisterType(std::string const & reg); +} \ No newline at end of file diff --git a/include/token/token.hpp b/include/token/token.hpp new file mode 100644 index 0000000..85c460e --- /dev/null +++ b/include/token/token.hpp @@ -0,0 +1,27 @@ +#pragma once +#include +#include +#include +#include + +namespace Token +{ + struct Token + { + int const lineNumber; + int const lineColumn; + TokenType type; + bool isValid; + std::variant data; + + Token(int const lineNumber, int const lineColumn); + Token(int const lineNumber, int const lineColumn, OperandType operatorType, bool validness); + Token(int const lineNumber, int const lineColumn, RegisterType registerType, bool validness); + Token(int const lineNumber, int const lineColumn, int value, bool validness); + Token(int const lineNumber, int const lineColumn, std::string const & value, bool validness); + + Token(Token const & other); + + void DebugPrint() const; + }; +} diff --git a/include/token/tokenizer.hpp b/include/token/tokenizer.hpp new file mode 100644 index 0000000..8f809d3 --- /dev/null +++ b/include/token/tokenizer.hpp @@ -0,0 +1,13 @@ +#pragma once +#include +#include +#include + +namespace Token +{ + class Tokenizer + { + public: + void Tokenize(std::string const & line, int const lineNumber, std::vector & tokens); + }; +} \ No newline at end of file diff --git a/include/token/tokentype.hpp b/include/token/tokentype.hpp new file mode 100644 index 0000000..e0289c8 --- /dev/null +++ b/include/token/tokentype.hpp @@ -0,0 +1,14 @@ +#pragma once + +namespace Token +{ + enum class TokenType + { + Unknown = -1, + Operand = 0, + ImmediateInteger, + Register, + StatementEnd, + Label + }; +} \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..df0f0ec --- /dev/null +++ b/makefile @@ -0,0 +1,29 @@ +CC = g++ +CFLAGS = -g -std=c++17 -Wall -Iinclude #-Werror +LFLAGS = #-lsfml-graphics -lsfml-window -lsfml-system + +CPPS = $(shell find src/ -name '*.cpp') +OBJS = $(patsubst src/%.cpp, build/%.o, ${CPPS}) +DEPS = $(patsubst src/%.cpp, build/%.d, ${CPPS}) + +BINARY = bin/wassembly + +.PHONY: all check clean + +all: ${BINARY} + +check: ${BINARY} + ./$< ./bin/example.wasm + +clean: + -rm -rf build ./${BINARY} + +${BINARY}: ${OBJS} + mkdir -p ${@D} + ${CC} ${CFLAGS} $^ ${LFLAGS} -o $@ + +build/%.o: src/%.cpp + mkdir -p ${@D} + ${CC} ${CFLAGS} -MMD -c $< -o $@ + +-include ${DEPS} diff --git a/src/execute/state.cpp b/src/execute/state.cpp new file mode 100644 index 0000000..8846a0e --- /dev/null +++ b/src/execute/state.cpp @@ -0,0 +1,11 @@ +#include + +namespace Execute +{ + State::State(std::unordered_map const & _labelStatementIndice) + : currentStatement(0u), + nextStatement(1u), + labelStatementIndice(_labelStatementIndice) + { + } +} \ No newline at end of file diff --git a/src/execute/virtualmachine.cpp b/src/execute/virtualmachine.cpp new file mode 100644 index 0000000..3bf8d04 --- /dev/null +++ b/src/execute/virtualmachine.cpp @@ -0,0 +1,52 @@ +#include + +namespace Execute +{ + void VirtualMachine::Step() + { + state.nextStatement = state.currentStatement + 1u; + + code.statements[state.currentStatement]->Execute(flags, state, registers); + + state.currentStatement = state.nextStatement; + if (state.currentStatement >= code.statements.size()) + { + terminated = true; + } + } + + void VirtualMachine::Run() + { + while(!IsTerminated()) + { + Step(); + } + } + + void VirtualMachine::SingleStep() + { + if(!IsTerminated()) + { + Step(); + } + } + + Flags const & VirtualMachine::GetFlags() const { return flags; } + Registers const & VirtualMachine::GetRegisters() const { return registers; } + State const & VirtualMachine::GetState() const { return state; } + Interpret::Statement const * const VirtualMachine::GetCurrentStatement() const + { + return code.statements[state.currentStatement].get(); + } + + bool VirtualMachine::IsTerminated() const { return terminated; } + + VirtualMachine::VirtualMachine(Interpret::Code const & _code) + : flags(), + registers(), + state(_code.labelStatementIndice), + terminated(false), + code(_code) + { + } +} \ No newline at end of file diff --git a/src/interpret/error.cpp b/src/interpret/error.cpp new file mode 100644 index 0000000..8da2050 --- /dev/null +++ b/src/interpret/error.cpp @@ -0,0 +1,58 @@ +#include + +namespace Interpret +{ + TokenError::TokenError(Token::Token const & token, std::string const & msg) + : errorToken(token), + errorMsg(msg) + { + } + + ExpectedArgument::ExpectedArgument(Token::Token const & token) + : TokenError(token, "Expected an argument") + { + } + + ExpectedLabel::ExpectedLabel(Token::Token const & token) + : TokenError(token, "Expected a label") + { + } + + ExpectedImmediate::ExpectedImmediate(Token::Token const & token) + : TokenError(token, "Expected an immediate value") + { + } + + ExpectedRegister::ExpectedRegister(Token::Token const & token) + : TokenError(token, "Expected a register") + { + } + + ExpectedOperand::ExpectedOperand(Token::Token const & token) + : TokenError(token, "Expected an operand") + { + } + + TooManyArguments::TooManyArguments(Token::Token const & token) + : TokenError(token, "Too many arguments for operand") + { + } + + TooFewArguments::TooFewArguments(Token::Token const & token) + : TokenError(token, "Too few arguments for operand") + { + } + + MissingEndOfStatment::MissingEndOfStatment(Token::Token const & token) + : TokenError(token, "Missing end of line terminator (;)") + { + } + + namespace Internal + { + BadTokenForValue::BadTokenForValue(Token::Token const & token) + : TokenError(token, "Internal error when converting token to value") + { + } + } +} \ No newline at end of file diff --git a/src/interpret/interpreter.cpp b/src/interpret/interpreter.cpp new file mode 100644 index 0000000..5e5e56b --- /dev/null +++ b/src/interpret/interpreter.cpp @@ -0,0 +1,77 @@ +#include +#include +#include + +namespace Interpret +{ + void Interpreter::Interpret(std::vector const & tokens, Code & code) + { + enum class InterpreterState + { + FindOperand, + FindArguments, + FindStatementEnd + }; + + InterpreterState state = InterpreterState::FindOperand; + unsigned operatorTokenIndex = 0u; + int expectedNumberOfArguments = 0; + for(unsigned i = 0u; i < tokens.size(); ++i) + { + auto const & token = tokens[i]; + switch(state) + { + case InterpreterState::FindOperand: + if (token.type == Token::TokenType::Operand) + { + operatorTokenIndex = i; + expectedNumberOfArguments = GetRequiredNumberOfArguments(std::get(token.data)); + if (expectedNumberOfArguments < 1) + { + state = InterpreterState::FindStatementEnd; + } + else + { + state = InterpreterState::FindArguments; + } + } + else if (token.type == Token::TokenType::Label) + { + code.labelStatementIndice[std::get(token.data)] = code.statements.size(); + } + else if (token.type != Token::TokenType::StatementEnd) + { + throw ExpectedOperand(token); + } + break; + + case InterpreterState::FindArguments: + if (token.type == Token::TokenType::ImmediateInteger || token.type == Token::TokenType::Register || token.type == Token::TokenType::Label) + { + expectedNumberOfArguments -= 1; + if (expectedNumberOfArguments < 1) + { + state = InterpreterState::FindStatementEnd; + } + } + else + { + throw ExpectedArgument(token); + } + break; + + case InterpreterState::FindStatementEnd: + if (token.type != Token::TokenType::StatementEnd) + { + throw MissingEndOfStatment(token); + } + else + { + code.statements.emplace_back(ExtractStatement(operatorTokenIndex, tokens)); + state = InterpreterState::FindOperand; + } + break; + } + } + } +} \ No newline at end of file diff --git a/src/interpret/operanddefinitions.cpp b/src/interpret/operanddefinitions.cpp new file mode 100644 index 0000000..7b0d848 --- /dev/null +++ b/src/interpret/operanddefinitions.cpp @@ -0,0 +1,165 @@ +#include +#include + +namespace Interpret +{ + Value GetImmediateArgument(unsigned const index, std::vector const & tokens) + { + auto const & token = tokens[index]; + if (token.type == Token::TokenType::ImmediateInteger) + { + Value v; + v.CreateFromToken(token); + + return v; + } + + throw ExpectedImmediate(token); + } + + Value GetRegisterArgument(unsigned const index, std::vector const & tokens) + { + auto const & token = tokens[index]; + if (token.type == Token::TokenType::Register) + { + Value v; + v.CreateFromToken(token); + + return v; + } + + throw ExpectedRegister(token); + } + + Value GetImmediateOrRegisterArgument(unsigned const index, std::vector const & tokens) + { + auto const & token = tokens[index]; + if (token.type == Token::TokenType::ImmediateInteger || token.type == Token::TokenType::Register) + { + Value v; + v.CreateFromToken(token); + + return v; + } + + throw ExpectedRegister(token); + } + + void AddArithmeticArguments(ThreeArgumentStatement & statement, unsigned const operandIndex, std::vector const & tokens) + { + statement.firstArgument = GetImmediateOrRegisterArgument(operandIndex + 1u, tokens); + statement.secondArgument = GetImmediateOrRegisterArgument(operandIndex + 2u, tokens); + statement.thirdArgument = GetRegisterArgument(operandIndex + 3u, tokens); + } + + std::unique_ptr ExtractStatement(unsigned const operandIndex, std::vector const & tokens) + { + auto const & token = tokens[operandIndex]; + switch(std::get(token.data)) + { + case Token::OperandType::AddInteger: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 + argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::SubtractInteger: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 - argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::DivideInteger: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 / argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::MultiplyInteger: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 * argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::ShiftIntegerLeft: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 << argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::ShiftIntegerRight: + { + auto statement = std::make_unique(); + statement->function = [](Execute::Flags & flags, int argument1, int argument2, int & argument3) + { + argument3 = argument1 >> argument2; + }; + AddArithmeticArguments(*statement, operandIndex, tokens); + return statement; + } + + case Token::OperandType::Jump: + { + auto labelToken = tokens[operandIndex + 1u]; + if (labelToken.type != Token::TokenType::Label) + { + throw ExpectedLabel(labelToken); + } + return std::make_unique(std::get(labelToken.data)); + } + + default: + { + auto statement = std::make_unique(); + // TODO throw error? + statement->function = [](Execute::Flags & flags, Execute::Registers & registers) { std::puts("ExtractStatement: Extracted unhandled operator type"); }; + return statement; + } + } + } + + int GetRequiredNumberOfArguments(Token::OperandType const type) + { + switch (type) + { + case Token::OperandType::AddInteger: + case Token::OperandType::SubtractInteger: + case Token::OperandType::DivideInteger: + case Token::OperandType::MultiplyInteger: + case Token::OperandType::ShiftIntegerLeft: + case Token::OperandType::ShiftIntegerRight: + return 3; + + case Token::OperandType::Jump: + return 1; + + default: + return 0; + } + } +} \ No newline at end of file diff --git a/src/interpret/statement.cpp b/src/interpret/statement.cpp new file mode 100644 index 0000000..59e1af3 --- /dev/null +++ b/src/interpret/statement.cpp @@ -0,0 +1,42 @@ +#include + +namespace Interpret +{ + void NoArgumentStatement::Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) + { + function(flags, registers); + } + + void OneArgumentStatement::Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) + { + function(flags, firstArgument.GetValue(registers)); + } + + void TwoArgumentStatement::Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) + { + function(flags, firstArgument.GetValue(registers), secondArgument.GetValue(registers)); + } + + void ThreeArgumentStatement::Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) + { + function(flags, firstArgument.GetValue(registers), secondArgument.GetValue(registers), thirdArgument.GetValue(registers)); + } + + void JumpStatement::Execute(Execute::Flags & flags, Execute::State & state, Execute::Registers & registers) + { + auto const & elem = state.labelStatementIndice.find(label); + if (elem != state.labelStatementIndice.end()) + { + state.nextStatement = elem->second; + } + else + { + throw std::runtime_error("Attempted jump to nonexisting label"); + } + } + + JumpStatement::JumpStatement(std::string const & _label) + : label(_label) + { + } +} \ No newline at end of file diff --git a/src/interpret/value.cpp b/src/interpret/value.cpp new file mode 100644 index 0000000..4d4103b --- /dev/null +++ b/src/interpret/value.cpp @@ -0,0 +1,33 @@ +#include +#include + +namespace Interpret +{ + int & Value::GetValue(Execute::Registers & registers) + { + if (type == ValueType::ImmediateInteger) + { + return integer; + } + + return registers.registers[registerIndex]; + } + + void Value::CreateFromToken(Token::Token const & token) + { + if (token.type == Token::TokenType::ImmediateInteger) + { + type = ValueType::ImmediateInteger; + integer = std::get(token.data); + } + else if (token.type == Token::TokenType::Register) + { + type = ValueType::Register; + registerIndex = static_cast(std::get(token.data)); + } + else + { + throw Internal::BadTokenForValue(token); + } + } +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..5472085 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#include +#include + +void PrintBadToken(Token::Token const & token, std::vector const & lines) +{ + std::printf("at line number %i, column %i:\n", + token.lineNumber + 1, + token.lineColumn + 1); + std::printf("%s\n", lines[token.lineNumber].c_str()); + for(int i = 0; i < token.lineColumn; ++i) + { + std::putc(' ', stdout); + } + std::puts("^"); +} + +void PrintTokenError(Interpret::TokenError const & err, std::vector const & lines) +{ + std::printf("%s ", err.errorMsg.c_str()); + PrintBadToken(err.errorToken, lines); +} + +int main(int argc, char ** argv) +{ + if (argc != 2) + { + std::puts("Usage: wassembly [filename.wasm]"); + return 1; + } + + std::ifstream input(argv[1]); + if (!input.is_open()) + { + std::printf("Error: Cannot open file %s for reading", argv[1]); + return 1; + } + + std::vector tokens; + Token::Tokenizer tokenizer; + std::vector lines; // DEBUG + std::string line; + unsigned lineNumber = 0; + while(std::getline(input, line)) + { + tokenizer.Tokenize(line, lineNumber, tokens); + ++lineNumber; + lines.push_back(line); // DEBUG + } + input.close(); + + // DEBUG + std::puts("*** Tokenization result ***"); + unsigned statementNumber = 0u; + std::printf("%02u - ", statementNumber); + for(unsigned i = 0u; i < tokens.size(); ++i) + { + auto const & token = tokens[i]; + token.DebugPrint(); + if (token.type == Token::TokenType::StatementEnd) + { + ++statementNumber; + if (i + 1 < tokens.size()) + { + std::printf("\n%02u - ", statementNumber); + } + else + { + std::puts(""); + } + } + } + // END DEBUG + + // Validate the syntax + bool syntaxOk = true; + for(auto const & token : tokens) + { + if (!token.isValid) + { + std::printf("Syntax error "); + PrintBadToken(token, lines); + syntaxOk = false; + } + } + + if (!syntaxOk) + { + std::puts("Aborting due to syntax error(s)"); + return 1; + } + + Interpret::Interpreter interpreter; + Interpret::Code code; + try + { + interpreter.Interpret(tokens, code); + } + catch(Interpret::TokenError & e) + { + PrintTokenError(e, lines); + return 1; + } + + // DEBUG + std::puts("\n*** Labels ***"); + for(auto const & labelIndice : code.labelStatementIndice) + { + std::printf("Label %s points to statement %u\n", labelIndice.first.c_str(), labelIndice.second); + } + + std::puts("\n*** Execution ***"); + Execute::VirtualMachine vm(code); + for(unsigned i = 0u; i < 900000; ++i) + { + vm.SingleStep(); + auto const & registers = vm.GetRegisters(); + std::printf("A=%i B=%i C=%i D=%i\n", registers.A, registers.B, registers.C, registers.D); + auto const & flags = vm.GetFlags(); + std::printf("-\n"); // TODO + auto const & state = vm.GetState(); + std::printf("current_statement=%i\n", state.currentStatement); + + std::puts("Press any key to step..."); + std::getchar(); + } + // END DEBUG + + return 0; +} \ No newline at end of file diff --git a/src/token/operandtype.cpp b/src/token/operandtype.cpp new file mode 100644 index 0000000..3a71aa9 --- /dev/null +++ b/src/token/operandtype.cpp @@ -0,0 +1,27 @@ +#include +#include + +namespace Token +{ + OperandType GetOperandType(std::string const & op) + { + static std::map> const operations = + { + { "addi", OperandType::AddInteger }, + { "subi", OperandType::SubtractInteger }, + { "divi", OperandType::DivideInteger }, + { "muli", OperandType::MultiplyInteger }, + { "shri", OperandType::ShiftIntegerRight }, + { "shli", OperandType::ShiftIntegerLeft }, + { "jmp", OperandType::Jump } + }; + + auto const & result = operations.find(op); + if (result != operations.end()) + { + return result->second; + } + + return OperandType::Unknown; + } +} \ No newline at end of file diff --git a/src/token/registertype.cpp b/src/token/registertype.cpp new file mode 100644 index 0000000..656bd10 --- /dev/null +++ b/src/token/registertype.cpp @@ -0,0 +1,24 @@ +#include +#include + +namespace Token +{ + RegisterType GetRegisterType(std::string const & reg) + { + static std::map> const registers = + { + { "A", RegisterType::A }, + { "B", RegisterType::B }, + { "C", RegisterType::C }, + { "D", RegisterType::D } + }; + + auto const & result = registers.find(reg); + if (result != registers.end()) + { + return result->second; + } + + return RegisterType::Unknown; + } +} \ No newline at end of file diff --git a/src/token/token.cpp b/src/token/token.cpp new file mode 100644 index 0000000..78b3d04 --- /dev/null +++ b/src/token/token.cpp @@ -0,0 +1,165 @@ +#include +#include + +namespace Token +{ + Token::Token(int const _lineNumber, int const _lineColumn) + : lineNumber(_lineNumber), + lineColumn(_lineColumn), + type(TokenType::Unknown), + isValid(false) + { + } + + Token::Token(int const _lineNumber, int const _lineColumn, OperandType _operatorType, bool validness) + : lineNumber(_lineNumber), + lineColumn(_lineColumn), + type(TokenType::Operand), + isValid(validness), + data(_operatorType) + { + } + + Token::Token(int const _lineNumber, int const _lineColumn, RegisterType _registerType, bool validness) + : lineNumber(_lineNumber), + lineColumn(_lineColumn), + type(TokenType::Register), + isValid(validness), + data(_registerType) + { + } + + Token::Token(int const _lineNumber, int const _lineColumn, int value, bool validness) + : lineNumber(_lineNumber), + lineColumn(_lineColumn), + type(TokenType::ImmediateInteger), + isValid(validness), + data(value) + { + } + + Token::Token(int const _lineNumber, int const _lineColumn, std::string const & value, bool validness) + : lineNumber(_lineNumber), + lineColumn(_lineColumn), + type(TokenType::Label), + isValid(validness), + data(value) + { + } + + Token::Token(Token const & other) + : lineNumber(other.lineNumber), + lineColumn(other.lineColumn), + type(other.type), + isValid(other.isValid), + data(other.data) + { + } + + void Token::DebugPrint() const + { + std::putc(' ', stdout); + switch(type) + { + case TokenType::ImmediateInteger: + if (isValid) + { + std::printf("$int=%i", std::get(data)); + } + else + { + std::printf("BAD_IMM_INT"); + } + break; + + case TokenType::Operand: + if (isValid) + { + switch(std::get(data)) + { + case OperandType::AddInteger: + std::printf("addi"); + break; + + case OperandType::MultiplyInteger: + std::printf("muli"); + break; + + case OperandType::SubtractInteger: + std::printf("subi"); + break; + + case OperandType::DivideInteger: + std::printf("divi"); + break; + + case OperandType::ShiftIntegerLeft: + std::printf("shli"); + break; + + case OperandType::ShiftIntegerRight: + std::printf("shri"); + break; + + case OperandType::Jump: + std::printf("jump"); + break; + + default: + std::printf("unknown_op"); + break; + } + } + else + { + std::printf("BAD_OP"); + } + break; + + case TokenType::Register: + if (isValid) + { + switch(std::get(data)) + { + case RegisterType::A: + std::printf("%%A"); + break; + + case RegisterType::B: + std::printf("%%B"); + break; + + case RegisterType::C: + std::printf("%%C"); + break; + + case RegisterType::D: + std::printf("%%D"); + break; + + default: + std::printf("%%unknown_reg"); + break; + } + } + else + { + std::printf("BAD_REG"); + } + break; + + case TokenType::StatementEnd: + std::printf("EOS"); + break; + + case TokenType::Label: + std::printf("label=%s", std::get(data).c_str()); + break; + + case TokenType::Unknown: + default: + std::printf("UNKNOWN_TOKEN"); + break; + } + } +} \ No newline at end of file diff --git a/src/token/tokenizer.cpp b/src/token/tokenizer.cpp new file mode 100644 index 0000000..fae22c6 --- /dev/null +++ b/src/token/tokenizer.cpp @@ -0,0 +1,109 @@ +#include +#include + +namespace Token +{ + bool IsWhiteSpace(char const c) + { + return c == '\n' || c == ' ' || c == '\t' || c == '\r'; + } + + Token ExtractToken(std::string const & string, int const lineNumber, int const lineColumn) + { + if (string.size() == 0) + { + return Token(lineNumber, lineColumn); + } + + char const prefix = string[0]; + if (prefix == '$') + { + int value = 0; + try + { + value = std::stoi(string.substr(1, string.size())); + } + catch(std::invalid_argument &) + { + return Token(lineNumber, lineColumn, 0, false); + } + + return Token(lineNumber, lineColumn, value, true); + } + + if (prefix == '%') + { + RegisterType const rtype = GetRegisterType(string.substr(1, string.size())); + return Token(lineNumber, lineColumn, rtype, rtype != RegisterType::Unknown); + } + + if (prefix == ';') + { + Token token(lineNumber, lineColumn); + token.type = TokenType::StatementEnd; + token.isValid = true; + return token; + } + + char const postfix = string[string.size() - 1]; + if (postfix == ':') + { + return Token(lineNumber, lineColumn, string.substr(0, string.size() - 1), true); + } + + OperandType const opType = GetOperandType(string); + if (opType != OperandType::Unknown) + { + return Token(lineNumber, lineColumn, opType, true); + } + + // Last resort: it must be a label + return Token(lineNumber, lineColumn, string, true); + } + + void Tokenizer::Tokenize(std::string const & line, int const lineNumber, std::vector & tokens) + { + enum class TokenizerState + { + LookForNextToken, + LookForTokenEnd, + }; + TokenizerState state = TokenizerState::LookForNextToken; + unsigned tokenStart = 0; + for(unsigned i = 0u; i < line.size(); ++i) + { + switch(state) + { + case TokenizerState::LookForNextToken: + if (!IsWhiteSpace(line[i])) + { + if (line[i] == '#') + { + // Ignore comments + return; + } + + tokenStart = i; + state = TokenizerState::LookForTokenEnd; + } + break; + + case TokenizerState::LookForTokenEnd: + if (IsWhiteSpace(line[i]) || line[i] == ';') + { + tokens.push_back(ExtractToken(line.substr(tokenStart, i - tokenStart), lineNumber, tokenStart)); + if (line[i] == ';') + { + tokens.push_back(ExtractToken(line.substr(i, 1), lineNumber, tokenStart)); + } + state = TokenizerState::LookForNextToken; + } + break; + } + } + if (state == TokenizerState::LookForTokenEnd) + { + tokens.push_back(ExtractToken(line.substr(tokenStart, line.size()), lineNumber, tokenStart)); + } + } +} \ No newline at end of file