Add string literals

This commit is contained in:
2020-05-17 20:30:57 +02:00
parent fc2870ca74
commit e1008b43a6
11 changed files with 152 additions and 48 deletions

View File

@@ -69,6 +69,7 @@ The following characters are used as identifiers:
- semicolon (`;`) for statement termination - semicolon (`;`) for statement termination
- hash (`#`) for comments - hash (`#`) for comments
- square brackets (`[` and `]`) for addressing memory - square brackets (`[` and `]`) for addressing memory
- double quotes (`"`) for string values
## Memory Model ## Memory Model
@@ -90,6 +91,8 @@ There is currently no strict checking, so be careful.
- `DECLARE` declares the first label argument to equal the second, immediate - `DECLARE` declares the first label argument to equal the second, immediate
value, argument and is used to declare a constant for the virtual machine. value, argument and is used to declare a constant for the virtual machine.
- `STRING` puts the string value declared as the second argument in the memory
memory location of the first immediate argument
### Operands ### Operands

View File

@@ -104,6 +104,9 @@ seti %B $10;
int $5; int $5;
int $3; int $3;
# Demonstrate string literals
"Hello world!";
exit; exit;
noop_function: noop_function:

View File

@@ -5,71 +5,71 @@
namespace Interpret namespace Interpret
{ {
struct TokenError : public std::exception struct InterpretationError : public std::exception
{ {
Token::Token errorToken; Token::Token errorToken;
std::string errorMsg; std::string errorMsg;
TokenError(Token::Token const & token, std::string const & msg); InterpretationError(Token::Token const & token, std::string const & msg);
}; };
struct ExpectedArgument : public TokenError struct ExpectedArgument : public InterpretationError
{ {
ExpectedArgument(Token::Token const & token); ExpectedArgument(Token::Token const & token);
}; };
struct ExpectedLabel : public TokenError struct ExpectedLabel : public InterpretationError
{ {
ExpectedLabel(Token::Token const & token); ExpectedLabel(Token::Token const & token);
}; };
struct ExpectedValue : public TokenError struct ExpectedValue : public InterpretationError
{ {
ExpectedValue(Token::Token const & token); ExpectedValue(Token::Token const & token);
}; };
struct ExpectedImmediate : public TokenError struct ExpectedImmediate : public InterpretationError
{ {
ExpectedImmediate(Token::Token const & token); ExpectedImmediate(Token::Token const & token);
}; };
struct ExpectedImmediateOrMemory : public TokenError struct ExpectedImmediateOrMemory : public InterpretationError
{ {
ExpectedImmediateOrMemory(Token::Token const & token); ExpectedImmediateOrMemory(Token::Token const & token);
}; };
struct ExpectedRegister : public TokenError struct ExpectedRegister : public InterpretationError
{ {
ExpectedRegister(Token::Token const & token); ExpectedRegister(Token::Token const & token);
}; };
struct ExpectedRegisterOrMemory : public TokenError struct ExpectedRegisterOrMemory : public InterpretationError
{ {
ExpectedRegisterOrMemory(Token::Token const & token); ExpectedRegisterOrMemory(Token::Token const & token);
}; };
struct ExpectedOperand : public TokenError struct ExpectedOperand : public InterpretationError
{ {
ExpectedOperand(Token::Token const & token); ExpectedOperand(Token::Token const & token);
}; };
struct TooManyArguments : public TokenError struct TooManyArguments : public InterpretationError
{ {
TooManyArguments(Token::Token const & token); TooManyArguments(Token::Token const & token);
}; };
struct TooFewArguments : public TokenError struct TooFewArguments : public InterpretationError
{ {
TooFewArguments(Token::Token const & token); TooFewArguments(Token::Token const & token);
}; };
struct MissingEndOfStatment : public TokenError struct MissingEndOfStatment : public InterpretationError
{ {
MissingEndOfStatment(Token::Token const & token); MissingEndOfStatment(Token::Token const & token);
}; };
namespace Internal namespace Internal
{ {
struct BadTokenForValue : public TokenError struct BadTokenForValue : public InterpretationError
{ {
BadTokenForValue(Token::Token const & token); BadTokenForValue(Token::Token const & token);
}; };

18
include/token/errors.hpp Normal file
View File

@@ -0,0 +1,18 @@
#pragma once
#include <stdexcept>
#include <token/token.hpp>
namespace Token
{
struct TokenizationError : public std::exception
{
Token errorToken;
std::string errorMsg;
TokenizationError(Token const & token, std::string const & msg);
};
struct MissingEndOfString : public TokenizationError
{
MissingEndOfString(Token const & token);
};
}

View File

@@ -42,6 +42,7 @@ namespace Token
static Token CreateOperandToken(OperandType const operandType, int const lineNumber, int const lineColumn); static Token CreateOperandToken(OperandType const operandType, int const lineNumber, int const lineColumn);
static Token CreateMemoryToken(RegisterType const registerType, int const lineNumber, int const lineColumn); static Token CreateMemoryToken(RegisterType const registerType, int const lineNumber, int const lineColumn);
static Token CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn); static Token CreateMemoryToken(int const value, bool isValid, int const lineNumber, int const lineColumn);
static Token CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn);
void DebugPrint() const; void DebugPrint() const;
}; };

View File

@@ -10,6 +10,7 @@ namespace Token
Register, Register,
StatementEnd, StatementEnd,
Label, Label,
String,
Memory Memory
}; };
} }

View File

@@ -2,71 +2,71 @@
namespace Interpret namespace Interpret
{ {
TokenError::TokenError(Token::Token const & token, std::string const & msg) InterpretationError::InterpretationError(Token::Token const & token, std::string const & msg)
: errorToken(token), : errorToken(token),
errorMsg(msg) errorMsg(msg)
{ {
} }
ExpectedArgument::ExpectedArgument(Token::Token const & token) ExpectedArgument::ExpectedArgument(Token::Token const & token)
: TokenError(token, "Expected an argument") : InterpretationError(token, "Expected an argument")
{ {
} }
ExpectedLabel::ExpectedLabel(Token::Token const & token) ExpectedLabel::ExpectedLabel(Token::Token const & token)
: TokenError(token, "Expected a label") : InterpretationError(token, "Expected a label")
{ {
} }
ExpectedValue::ExpectedValue(Token::Token const & token) ExpectedValue::ExpectedValue(Token::Token const & token)
: TokenError(token, "Expected an immediate value, a register or a memory location") : InterpretationError(token, "Expected an immediate value, a register or a memory location")
{ {
} }
ExpectedImmediate::ExpectedImmediate(Token::Token const & token) ExpectedImmediate::ExpectedImmediate(Token::Token const & token)
: TokenError(token, "Expected an immediate value") : InterpretationError(token, "Expected an immediate value")
{ {
} }
ExpectedImmediateOrMemory::ExpectedImmediateOrMemory(Token::Token const & token) ExpectedImmediateOrMemory::ExpectedImmediateOrMemory(Token::Token const & token)
: TokenError(token, "Expected an immediate value or a memory location") : InterpretationError(token, "Expected an immediate value or a memory location")
{ {
} }
ExpectedRegister::ExpectedRegister(Token::Token const & token) ExpectedRegister::ExpectedRegister(Token::Token const & token)
: TokenError(token, "Expected a register") : InterpretationError(token, "Expected a register")
{ {
} }
ExpectedRegisterOrMemory::ExpectedRegisterOrMemory(Token::Token const & token) ExpectedRegisterOrMemory::ExpectedRegisterOrMemory(Token::Token const & token)
: TokenError(token, "Expected a register or a memory location") : InterpretationError(token, "Expected a register or a memory location")
{ {
} }
ExpectedOperand::ExpectedOperand(Token::Token const & token) ExpectedOperand::ExpectedOperand(Token::Token const & token)
: TokenError(token, "Expected an operand") : InterpretationError(token, "Expected an operand")
{ {
} }
TooManyArguments::TooManyArguments(Token::Token const & token) TooManyArguments::TooManyArguments(Token::Token const & token)
: TokenError(token, "Too many arguments for operand") : InterpretationError(token, "Too many arguments for operand")
{ {
} }
TooFewArguments::TooFewArguments(Token::Token const & token) TooFewArguments::TooFewArguments(Token::Token const & token)
: TokenError(token, "Too few arguments for operand") : InterpretationError(token, "Too few arguments for operand")
{ {
} }
MissingEndOfStatment::MissingEndOfStatment(Token::Token const & token) MissingEndOfStatment::MissingEndOfStatment(Token::Token const & token)
: TokenError(token, "Missing end of line terminator (;)") : InterpretationError(token, "Missing end of line terminator (;)")
{ {
} }
namespace Internal namespace Internal
{ {
BadTokenForValue::BadTokenForValue(Token::Token const & token) BadTokenForValue::BadTokenForValue(Token::Token const & token)
: TokenError(token, "Internal error when converting token to value") : InterpretationError(token, "Internal error when converting token to value")
{ {
} }
} }

View File

@@ -3,6 +3,7 @@
#include <fstream> #include <fstream>
#include <interpret/errors.hpp> #include <interpret/errors.hpp>
#include <interpret/interpreter.hpp> #include <interpret/interpreter.hpp>
#include <token/errors.hpp>
#include <token/tokenizer.hpp> #include <token/tokenizer.hpp>
void PrintBadToken(Token::Token const & token, std::vector<std::string> const & lines) void PrintBadToken(Token::Token const & token, std::vector<std::string> const & lines)
@@ -18,7 +19,13 @@ void PrintBadToken(Token::Token const & token, std::vector<std::string> const &
std::puts("^"); std::puts("^");
} }
void PrintTokenError(Interpret::TokenError const & err, std::vector<std::string> const & lines) void PrintTokenError(Interpret::InterpretationError const & err, std::vector<std::string> const & lines)
{
std::printf("%s ", err.errorMsg.c_str());
PrintBadToken(err.errorToken, lines);
}
void PrintTokenError(Token::TokenizationError const & err, std::vector<std::string> const & lines)
{ {
std::printf("%s ", err.errorMsg.c_str()); std::printf("%s ", err.errorMsg.c_str());
PrintBadToken(err.errorToken, lines); PrintBadToken(err.errorToken, lines);
@@ -71,27 +78,37 @@ Interpret::Code GetCodeFromFile(std::string const & filePath)
std::vector<std::string> lines; std::vector<std::string> lines;
std::string line; std::string line;
unsigned lineNumber = 0; unsigned lineNumber = 0;
bool tokenizationError = false;
while(std::getline(input, line)) while(std::getline(input, line))
{ {
tokenizer.Tokenize(line, lineNumber, tokens); try
{
tokenizer.Tokenize(line, lineNumber, tokens);
}
catch(Token::TokenizationError & err)
{
tokenizationError = true;
PrintTokenError(err, lines);
}
++lineNumber; ++lineNumber;
lines.push_back(line); lines.push_back(line);
} }
input.close(); input.close();
// Validate the syntax // Validate the syntax
bool syntaxOk = true; bool syntaxError = false;
for(auto const & token : tokens) for(auto const & token : tokens)
{ {
if (!token.isValid) if (!token.isValid)
{ {
std::printf("Syntax error "); std::printf("Syntax error ");
PrintBadToken(token, lines); PrintBadToken(token, lines);
syntaxOk = false; syntaxError = true;
} }
} }
if (!syntaxOk) if (tokenizationError || syntaxError)
{ {
std::puts("Aborting due to syntax error(s)"); std::puts("Aborting due to syntax error(s)");
exit(1); exit(1);
@@ -103,7 +120,7 @@ Interpret::Code GetCodeFromFile(std::string const & filePath)
{ {
interpreter.Interpret(tokens, code); interpreter.Interpret(tokens, code);
} }
catch(Interpret::TokenError & e) catch(Interpret::InterpretationError & e)
{ {
PrintTokenError(e, lines); PrintTokenError(e, lines);
exit(1); exit(1);
@@ -116,13 +133,13 @@ int main(int argc, char ** argv)
{ {
if (argc != 2) if (argc != 2)
{ {
std::puts("Usage: wassembly [filename.wasm]"); std::printf("Usage: %s [filename.wasm]\n", argv[0]);
return 1; return 1;
} }
auto const code = GetCodeFromFile(argv[1]); auto const code = GetCodeFromFile(argv[1]);
auto vm = Execute::VirtualMachine::CreateFromCode(code); auto vm = Execute::VirtualMachine::CreateFromCode(code);
vm.Run(); vm.Run();
return 0; return 0;
} }

15
src/token/errors.cpp Normal file
View File

@@ -0,0 +1,15 @@
#include <token/errors.hpp>
namespace Token
{
TokenizationError::TokenizationError(Token const & token, std::string const & msg)
: errorToken(token),
errorMsg(msg)
{
}
MissingEndOfString::MissingEndOfString(Token const & token)
: TokenizationError(token, "Missing string terminator (\")")
{
}
}

View File

@@ -103,6 +103,11 @@ namespace Token
return Token(TokenType::Memory, value, isValid, lineNumber, lineColumn); return Token(TokenType::Memory, value, isValid, lineNumber, lineColumn);
} }
Token Token::CreateStringLiteralToken(std::string const & value, int const lineNumber, int const lineColumn)
{
return Token(TokenType::String, value.substr(1, value.size() - 2), true, lineNumber, lineColumn);
}
void Token::DebugPrint() const void Token::DebugPrint() const
{ {
std::putc(' ', stdout); std::putc(' ', stdout);
@@ -188,6 +193,10 @@ namespace Token
} }
break; break;
case TokenType::String:
std::printf("STRING=\"%s\"", std::get<std::string>(data).c_str());
break;
case TokenType::Unknown: case TokenType::Unknown:
default: default:
std::printf("UNKNOWN_TOKEN"); std::printf("UNKNOWN_TOKEN");

View File

@@ -1,5 +1,6 @@
#include <map> #include <map>
#include <stdexcept> #include <stdexcept>
#include <token/errors.hpp>
#include <token/tokenizer.hpp> #include <token/tokenizer.hpp>
namespace Token namespace Token
@@ -9,7 +10,7 @@ namespace Token
return c == '\n' || c == ' ' || c == '\t' || c == '\r'; return c == '\n' || c == ' ' || c == '\t' || c == '\r';
} }
std::tuple<int, bool> TryParse(std::string const & string) std::tuple<int, bool> TryParseInt(std::string const & string)
{ {
try try
{ {
@@ -30,20 +31,22 @@ namespace Token
} }
char const prefix = string[0]; char const prefix = string[0];
if (prefix == '$') switch(prefix)
{ {
auto const result = TryParse(string.substr(1, string.size())); case '$':
return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); {
} auto const result = TryParseInt(string.substr(1, string.size()));
return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
}
if (prefix == '%') case '%':
{
return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn); return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn);
}
if (prefix == ';') case ';':
{
return Token::CreateStatementEndToken(lineNumber, lineColumn); return Token::CreateStatementEndToken(lineNumber, lineColumn);
default:
break;
} }
char const postfix = string[string.size() - 1]; char const postfix = string[string.size() - 1];
@@ -64,7 +67,7 @@ namespace Token
std::string const valueString = string.substr(2, string.size() - 3u); std::string const valueString = string.substr(2, string.size() - 3u);
if (memoryPrefix == '$') if (memoryPrefix == '$')
{ {
auto const result = TryParse(valueString); auto const result = TryParseInt(valueString);
return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn); return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
} }
else if (memoryPrefix == '%') else if (memoryPrefix == '%')
@@ -97,8 +100,10 @@ namespace Token
enum class TokenizerState enum class TokenizerState
{ {
LookForNextToken, LookForNextToken,
LookForStringEnd,
LookForTokenEnd, LookForTokenEnd,
}; };
TokenizerState state = TokenizerState::LookForNextToken; TokenizerState state = TokenizerState::LookForNextToken;
unsigned columnTokenStart = 0; unsigned columnTokenStart = 0;
for(unsigned column = 0u; column < line.size(); ++column) for(unsigned column = 0u; column < line.size(); ++column)
@@ -115,7 +120,21 @@ namespace Token
} }
columnTokenStart = column; columnTokenStart = column;
state = TokenizerState::LookForTokenEnd;
switch(line[column])
{
case '"':
state = TokenizerState::LookForStringEnd;
break;
case ';':
tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column));
break;
default:
state = TokenizerState::LookForTokenEnd;
break;
}
} }
break; break;
@@ -130,11 +149,29 @@ namespace Token
state = TokenizerState::LookForNextToken; state = TokenizerState::LookForNextToken;
} }
break; break;
case TokenizerState::LookForStringEnd:
if (line[column] == '"' && line[column - 1] != '\\')
{
tokens.push_back(
Token::CreateStringLiteralToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
state = TokenizerState::LookForNextToken;
}
} }
} }
if (state == TokenizerState::LookForTokenEnd)
switch(state)
{ {
case TokenizerState::LookForTokenEnd:
tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart)); tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
break;
case TokenizerState::LookForStringEnd:
throw MissingEndOfString(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
case TokenizerState::LookForNextToken:
default:
break;
} }
} }
} }