Extract preprocessing from tokenizer

This commit is contained in:
2020-08-29 11:25:10 +02:00
parent aebc1dd86d
commit 71678b2ec6
12 changed files with 254 additions and 134 deletions

View File

@@ -24,8 +24,7 @@ afterwards can be a bit cryptic as to where it originated.
- `[operation][number type]`, e.g. `divi` for divide (div) integer - `[operation][number type]`, e.g. `divi` for divide (div) integer
- `%[register]` for addressing registers - `%[register]` for addressing registers
- `$[value]` for using immediate (literal) integer values - `$[value]` for using immediate (literal) integer values
- `'a'` for using immediate character values (currently only supports non - `'a'` for using immediate character values
escaped characters)
- `;` for end of statement (mandatory) - `;` for end of statement (mandatory)
- `[label]:` for labels - `[label]:` for labels
- `#[text]` for comments: any text is ignored till a newline (`\n`) is found - `#[text]` for comments: any text is ignored till a newline (`\n`) is found
@@ -92,13 +91,33 @@ There is currently no strict checking, so be careful.
### Preprocessor ### Preprocessor
All preprocessor directives are prefixed by a `#`. Ill formed preprocessor All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
directives do not halt compilation, they are merely reported and then ignored. directives do not halt compilation, they are merely ignored. All preprocessing
is done in a single pass. Recursion or definition of a directive by another
directive is not supported therefore.
- `DEFINE` replaces any occurrence of the first argument by the second argument. - `DEFINE <x> [y]` replaces any occurrence of the first argument (`x`) by the
The second argument may be empty, effectively deleting occurences of argument second optional argument (`y`). The second argument can be empty, effectively
one. Quotes are currently not supported and arguments are separated by deleting all occurrences of `x`. Quotes are currently not supported and
whitespace. If multiple defines exist for the same substitution the first arguments are separated by whitespace. If multiple defines exist the later
declared is used. declarations will overwrite the previous.
### Registers
All registers are 32 bits wide. The following 4 registers currently exist:
- A
- B
- C
- D
### Immediates
An immediate integer value for 42 is for examle `$42`. Negative values are
allowed, for example `$-42`. Notation must be in decimal, hexadecimal and octals
are **not supported**.
The immediate character value for the letter g is `'g'`. Character values must
be a single character, escaped or multi byte characters are **not supported**.
### Operands ### Operands

View File

@@ -1,4 +1,4 @@
#DEFINE #DEFINE LETTER_O 'o'
addi $10 $-5 %A; addi $10 $-5 %A;
subi %A $2 %B; subi %A $2 %B;
@@ -31,7 +31,7 @@ int PRINT_CHAR;
seti %A $108; # l seti %A $108; # l
int PRINT_CHAR; int PRINT_CHAR;
int PRINT_CHAR; int PRINT_CHAR;
seti %A $111; # o seti %A LETTER_O;
int PRINT_CHAR; int PRINT_CHAR;
seti %A $32; # space seti %A $32; # space
@@ -105,9 +105,6 @@ seti %B $10;
int $5; int $5;
int $3; int $3;
# Demonstrate string literals
"Hello world!";
exit; exit;
noop_function: noop_function:

View File

@@ -0,0 +1,21 @@
#pragma once
#include <string>
#include <vector>
class Preprocessor
{
private:
std::vector<std::string> substitutionIdentifiers;
std::vector<std::string> substitutionValues;
void extractComment(std::string & line,
std::size_t const lineNumber,
std::size_t const lineColumn);
void processLine(std::string & line, std::size_t const lineNumber);
public:
void process(std::vector<std::string> & lines);
void printSubstitutions() const;
};

View File

@@ -15,11 +15,6 @@ namespace Token
int const lineNumber, int const lineNumber,
int const lineColumn) const; int const lineColumn) const;
void ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn);
void ParseCharacterLiteral( void ParseCharacterLiteral(
std::string const & line, std::string const & line,
int const lineNumber, int const lineNumber,

6
include/utils.hpp Normal file
View File

@@ -0,0 +1,6 @@
#pragma once
namespace Utils
{
bool isWhitespaceCharacter(char const c);
}

View File

@@ -9,12 +9,15 @@ class Wassembler
private: private:
Configuration config; Configuration config;
Execute::VirtualMachine vm; Execute::VirtualMachine vm;
bool printSubstitutions;
bool LoadLinesFromFile(std::string const & filePath, std::vector<std::string> & lines) const; bool LoadLinesFromFile(std::string const & filePath, std::vector<std::string> & lines) const;
bool LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const; bool LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const;
public: public:
void SetMemorySize(unsigned const size); void SetMemorySize(unsigned const size);
void EnableSubstitutionsLogging();
bool LoadFromFile(std::string const & filePath); bool LoadFromFile(std::string const & filePath);
void Run(); void Run();

View File

@@ -13,7 +13,7 @@ BINARY = bin/wassembler
all: ${BINARY} all: ${BINARY}
check: ${BINARY} check: ${BINARY}
./$< ./bin/test.wasm ./$< ./bin/test.wasm -p
clean: clean:
-rm -rf build ./${BINARY} -rm -rf build ./${BINARY}

View File

@@ -7,10 +7,12 @@ int main(int argc, char ** argv)
{ {
std::string inputFile; std::string inputFile;
unsigned memorySize = 1024; unsigned memorySize = 1024;
bool printSubstitutions = false;
auto cli = ( auto cli = (
clipp::value("input wasm file").set(inputFile), clipp::value("input wasm file").set(inputFile),
clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize) clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize),
clipp::option("-p", "--print-substitutions").set(printSubstitutions)
); );
if (!clipp::parse(argc, argv, cli)) if (!clipp::parse(argc, argv, cli))
@@ -21,6 +23,11 @@ int main(int argc, char ** argv)
Wassembler wassembler; Wassembler wassembler;
wassembler.SetMemorySize(memorySize); wassembler.SetMemorySize(memorySize);
if (printSubstitutions)
{
wassembler.EnableSubstitutionsLogging();
}
if (!wassembler.LoadFromFile(inputFile)) if (!wassembler.LoadFromFile(inputFile))
{ {
exit(1); exit(1);

View File

@@ -0,0 +1,159 @@
#include <preprocessor/preprocessor.hpp>
#include <utils.hpp>
bool trySubstitute(
std::string & line,
std::size_t const lineColumn,
std::vector<std::string> const & substitutionIdentifiers,
std::vector<std::string> const & substitutionValues)
{
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
{
if (line.compare(lineColumn, substitutionIdentifiers[i].size(), substitutionIdentifiers[i]) != 0)
{
continue;
}
std::string const lineCopy = line;
line = lineCopy.substr(0, lineColumn) + substitutionValues[i];
if (lineCopy.size() > lineColumn + substitutionIdentifiers[i].size())
{
line += lineCopy.substr(lineColumn + substitutionIdentifiers[i].size(), lineCopy.size());
}
return true;
}
return false;
}
void Preprocessor::extractComment(
std::string & line,
std::size_t const lineNumber,
std::size_t const lineColumn)
{
if (line.size() <= lineColumn + 1 ||
line.compare(lineColumn + 1, std::string::npos, "DEFINE") <= 0)
{
// No match or empty DEFINE statement
line = line.substr(0, lineColumn);
}
enum CommentParseState
{
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
std::size_t argumentCount = 0, argumentStart = 0;
CommentParseState state = LookForArgumentStart;
for(std::size_t i = lineColumn + 7; i < line.size(); ++i)
{
switch(state)
{
case LookForArgumentStart:
if(!Utils::isWhitespaceCharacter(line[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case LookForArgumentEnd:
if (Utils::isWhitespaceCharacter(line[i]))
{
switch(argumentCount)
{
case 0:
firstArgument = line.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = line.substr(argumentStart, i - argumentStart);
break;
default:
break;
}
++argumentCount;
state = CommentParseState::LookForArgumentStart;
}
break;
}
}
switch(state)
{
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = line.substr(argumentStart);
break;
case 1:
secondArgument = line.substr(argumentStart);
break;
default:
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutionIdentifiers.push_back(firstArgument);
substitutionValues.push_back(secondArgument);
}
line = line.substr(0, lineColumn);
}
void Preprocessor::processLine(std::string & line, std::size_t const lineNumber)
{
for(std::size_t i = 0; i < line.size(); ++i)
{
if (!Utils::isWhitespaceCharacter(line[i]))
{
if (trySubstitute(line, i, substitutionIdentifiers, substitutionValues))
{
continue;
}
if (line[i] == '#')
{
extractComment(line, lineNumber, i);
return;
}
}
}
}
void Preprocessor::process(std::vector<std::string> & lines)
{
substitutionIdentifiers.clear();
substitutionValues.clear();
for(std::size_t i = 0; i < lines.size(); ++i)
{
processLine(lines[i], i);
}
}
void Preprocessor::printSubstitutions() const
{
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
{
std::printf(
"%s -> %s\n",
substitutionIdentifiers[i].c_str(),
substitutionValues[i].c_str());
}
}

View File

@@ -3,6 +3,7 @@
#include <stdexcept> #include <stdexcept>
#include <token/errors.hpp> #include <token/errors.hpp>
#include <token/tokenizer.hpp> #include <token/tokenizer.hpp>
#include <utils.hpp>
namespace Token namespace Token
{ {
@@ -150,111 +151,6 @@ namespace Token
return Token::CreateLabelToken(string, lineNumber, lineColumn); return Token::CreateLabelToken(string, lineNumber, lineColumn);
} }
bool IsWhiteSpace(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
void Tokenizer::ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn)
{
unsigned const commentContentStart = lineColumn + 1;
if (string.size() < commentContentStart ||
IsWhiteSpace(string[commentContentStart]))
{
return;
}
enum class CommentParseState
{
LookForDirectiveEnd,
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
unsigned argumentCount = 0, argumentStart = 0;
CommentParseState state = CommentParseState::LookForDirectiveEnd;
for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
{
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
if(IsWhiteSpace(string[i]))
{
if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
{
// Nonzero = not equal
return;
}
state = CommentParseState::LookForArgumentStart;
}
break;
case CommentParseState::LookForArgumentStart:
if(!IsWhiteSpace(string[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case CommentParseState::LookForArgumentEnd:
if (IsWhiteSpace(string[i]))
{
state = CommentParseState::LookForArgumentStart;
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart, i - argumentStart);
break;
default:
goto end_state_loop;
}
++argumentCount;
}
break;
}
}
end_state_loop:
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart);
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutions.push_back(std::make_pair(firstArgument, secondArgument));
}
else
{
std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
}
}
// Modifies the lineColumn parameter to point at the character literal end // Modifies the lineColumn parameter to point at the character literal end
void Tokenizer::ParseCharacterLiteral( void Tokenizer::ParseCharacterLiteral(
std::string const & line, std::string const & line,
@@ -314,16 +210,12 @@ end_state_loop:
switch(state) switch(state)
{ {
case TokenizerState::LookForNextToken: case TokenizerState::LookForNextToken:
if (!IsWhiteSpace(line[column])) if (!Utils::isWhitespaceCharacter(line[column]))
{ {
if (line[column] == '#')
{
ParseComment(line, lineNumber, column);
return;
}
if (line[column] == '\'') if (line[column] == '\'')
{ {
// TODO integrate this better with the existing extract token
// infrastructure
ParseCharacterLiteral(line, lineNumber, column, tokens); ParseCharacterLiteral(line, lineNumber, column, tokens);
break; break;
} }
@@ -345,7 +237,7 @@ end_state_loop:
break; break;
case TokenizerState::LookForTokenEnd: case TokenizerState::LookForTokenEnd:
if (IsWhiteSpace(line[column]) || line[column] == ';') if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';')
{ {
tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart)); tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
if (line[column] == ';') if (line[column] == ';')

9
src/utils.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include <utils.hpp>
namespace Utils
{
bool isWhitespaceCharacter(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
}

View File

@@ -1,5 +1,6 @@
#include <fstream> #include <fstream>
#include <interpret/errors.hpp> #include <interpret/errors.hpp>
#include <preprocessor/preprocessor.hpp>
#include <token/errors.hpp> #include <token/errors.hpp>
#include <wassembler.hpp> #include <wassembler.hpp>
@@ -48,7 +49,6 @@ bool Wassembler::LoadLinesFromFile(std::string const & filePath, std::vector<std
return true; return true;
} }
bool Wassembler::LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const bool Wassembler::LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const
{ {
Token::Tokenizer tokenizer; Token::Tokenizer tokenizer;
@@ -86,6 +86,11 @@ void Wassembler::SetMemorySize(unsigned const size)
config.memorySize = size; config.memorySize = size;
} }
void Wassembler::EnableSubstitutionsLogging()
{
printSubstitutions = true;
}
bool Wassembler::LoadFromFile(std::string const & filePath) bool Wassembler::LoadFromFile(std::string const & filePath)
{ {
std::vector<std::string> lines; std::vector<std::string> lines;
@@ -95,6 +100,13 @@ bool Wassembler::LoadFromFile(std::string const & filePath)
return false; return false;
} }
Preprocessor preprocessor;
preprocessor.process(lines);
if (printSubstitutions)
{
preprocessor.printSubstitutions();
}
std::vector<Token::Token> tokens; std::vector<Token::Token> tokens;
if (!LoadTokens(lines, tokens)) if (!LoadTokens(lines, tokens))
{ {