Add DEFINE preprocessor directive

This commit is contained in:
2020-05-30 12:21:50 +02:00
parent 3bb2cc17e9
commit 0286f25e8d
5 changed files with 174 additions and 29 deletions

View File

@@ -12,7 +12,7 @@ language, or keep on reading!
## From Text To Runtime Behaviour
In order to turn the source text into executable code we use 3 passes:
- Pass 1: tokenization (syntax check)
- Pass 1: tokenization (syntax check) and preprocessing (substitution)
- Pass 2: interpretation (semantics check)
- Pass 3: execution (runtime check)
@@ -69,7 +69,6 @@ The following characters are used as identifiers:
- semicolon (`;`) for statement termination
- hash (`#`) for comments
- square brackets (`[` and `]`) for addressing memory
- double quotes (`"`) for string values
## Memory Model
@@ -87,12 +86,16 @@ second byte of location `$900`).
All symbols are reserved keywords and can therefore NOT be used as labels.
There is currently no strict checking, so be careful.
## Directives
## Preprocessor
- `DECLARE` declares the first label argument to equal the second, immediate
value, argument and is used to declare a constant for the virtual machine.
- `STRING` puts the string value declared as the second argument in the memory
memory location of the first immediate argument
All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
directives do not halt compilation, they are merely reported and then ignored.
- `DEFINE` replaces any occurrence of the first argument by the second argument.
The second argument may be empty, effectively deleting occurences of argument
one. Quotes are currently not supported and arguments are separated by
whitespace. If multiple defines exist for the same substitution the first
declared is used.
### Operands

View File

@@ -1,4 +1,4 @@
#DECLARE MEMORY_SIZE $4096;
#DEFINE
addi $10 $-5 %A;
subi %A $2 %B;
@@ -22,16 +22,17 @@ seti %A %B;
lti %A $10;
jmp count_loop;
#DEFINE PRINT_CHAR $0
# Hello world
seti %A $72; # H
int $0;
int PRINT_CHAR;
seti %A $101; # e
int $0;
int PRINT_CHAR;
seti %A $108; # l
int $0;
int $0;
int PRINT_CHAR;
int PRINT_CHAR;
seti %A $111; # o
int $0;
int PRINT_CHAR;
seti %A $32; # space
int $0;

View File

@@ -7,6 +7,12 @@ namespace Token
{
class Tokenizer
{
private:
std::vector<std::pair<std::string, std::string>> substitutions;
Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
public:
void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
};

View File

@@ -1,7 +1,6 @@
#pragma once
#include <configuration.hpp>
#include <execute/virtualmachine.hpp>
#include <preprocessor/preprocessor.hpp>
#include <interpret/interpreter.hpp>
#include <token/tokenizer.hpp>

View File

@@ -5,11 +5,6 @@
namespace Token
{
bool IsWhiteSpace(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
std::tuple<int, bool> TryParseInt(std::string const & string)
{
try
@@ -23,24 +18,42 @@ namespace Token
}
}
Token ExtractToken(std::string const & string, int const lineNumber, int const lineColumn)
Token Tokenizer::ExtractToken(std::string string,
int const lineNumber,
int const lineColumn) const
{
if (string.size() == 0)
{
return Token::CreateUnknownToken(lineNumber, lineColumn);
}
for(std::size_t i = 0; i < substitutions.size(); ++i)
{
if (string == substitutions[i].first)
{
string = substitutions[i].second;
break;
}
}
char const prefix = string[0];
switch(prefix)
{
case '$':
{
auto const result = TryParseInt(string.substr(1, string.size()));
return Token::CreateImmediateValueToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
return Token::CreateImmediateValueToken(
std::get<0>(result),
std::get<1>(result),
lineNumber,
lineColumn);
}
case '%':
return Token::CreateRegisterToken(GetRegisterType(string.substr(1, string.size())), lineNumber, lineColumn);
return Token::CreateRegisterToken(GetRegisterType(
string.substr(1, string.size())),
lineNumber,
lineColumn);
case ';':
return Token::CreateStatementEndToken(lineNumber, lineColumn);
@@ -53,7 +66,11 @@ namespace Token
if (postfix == ':')
{
// TODO check if label is an Operand?
return Token::CreateLabelToken(string.substr(0, string.size() - 1), true, lineNumber, lineColumn);
return Token::CreateLabelToken(
string.substr(0, string.size() - 1),
true,
lineNumber,
lineColumn);
}
if (prefix == '[' && postfix == ']')
@@ -68,11 +85,18 @@ namespace Token
if (memoryPrefix == '$')
{
auto const result = TryParseInt(valueString);
return Token::CreateMemoryToken(std::get<0>(result), std::get<1>(result), lineNumber, lineColumn);
return Token::CreateMemoryToken(
std::get<0>(result),
std::get<1>(result),
lineNumber,
lineColumn);
}
else if (memoryPrefix == '%')
{
return Token::CreateMemoryToken(GetRegisterType(valueString), lineNumber, lineColumn);
return Token::CreateMemoryToken(
GetRegisterType(valueString),
lineNumber,
lineColumn);
}
else
{
@@ -95,7 +119,115 @@ namespace Token
return Token::CreateLabelToken(string, true, lineNumber, lineColumn);
}
void Tokenizer::Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens)
bool IsWhiteSpace(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
void Tokenizer::ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn)
{
unsigned const commentContentStart = lineColumn + 1;
if (string.size() < commentContentStart ||
IsWhiteSpace(string[commentContentStart]))
{
return;
}
enum class CommentParseState
{
LookForDirectiveEnd,
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
unsigned argumentCount = 0, argumentStart = 0;
CommentParseState state = CommentParseState::LookForDirectiveEnd;
for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
{
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
if(IsWhiteSpace(string[i]))
{
if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
{
// Nonzero = not equal
return;
}
state = CommentParseState::LookForArgumentStart;
}
break;
case CommentParseState::LookForArgumentStart:
if(!IsWhiteSpace(string[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case CommentParseState::LookForArgumentEnd:
if (IsWhiteSpace(string[i]))
{
state = CommentParseState::LookForArgumentStart;
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart, i - argumentStart);
break;
default:
goto end_state_loop;
}
++argumentCount;
}
break;
}
}
end_state_loop:
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart);
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutions.push_back(std::make_pair(firstArgument, secondArgument));
}
else
{
std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
}
}
void Tokenizer::Tokenize(
std::string const & line,
int const lineNumber,
std::vector<Token> & tokens)
{
enum class TokenizerState
{
@@ -114,7 +246,7 @@ namespace Token
{
if (line[column] == '#')
{
// Ignore comments
ParseComment(line, lineNumber, column);
return;
}
@@ -123,7 +255,8 @@ namespace Token
switch(line[column])
{
case ';':
tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column));
tokens.push_back(
ExtractToken(line.substr(column, 1), lineNumber, column));
break;
default:
@@ -154,7 +287,10 @@ namespace Token
switch(state)
{
case TokenizerState::LookForTokenEnd:
tokens.push_back(ExtractToken(line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart));
tokens.push_back(ExtractToken(
line.substr(columnTokenStart, line.size()),
lineNumber,
columnTokenStart));
break;
case TokenizerState::LookForNextToken: