Extract preprocessing from tokenizer
This commit is contained in:
35
README.md
35
README.md
@@ -24,8 +24,7 @@ afterwards can be a bit cryptic as to where it originated.
|
||||
- `[operation][number type]`, e.g. `divi` for divide (div) integer
|
||||
- `%[register]` for addressing registers
|
||||
- `$[value]` for using immediate (literal) integer values
|
||||
- `'a'` for using immediate character values (currently only supports non
|
||||
escaped characters)
|
||||
- `'a'` for using immediate character values
|
||||
- `;` for end of statement (mandatory)
|
||||
- `[label]:` for labels
|
||||
- `#[text]` for comments: any text is ignored till a newline (`\n`) is found
|
||||
@@ -92,13 +91,33 @@ There is currently no strict checking, so be careful.
|
||||
### Preprocessor
|
||||
|
||||
All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
|
||||
directives do not halt compilation, they are merely reported and then ignored.
|
||||
directives do not halt compilation, they are merely ignored. All preprocessing
|
||||
is done in a single pass. Recursion or definition of a directive by another
|
||||
directive is not supported therefore.
|
||||
|
||||
- `DEFINE` replaces any occurrence of the first argument by the second argument.
|
||||
The second argument may be empty, effectively deleting occurences of argument
|
||||
one. Quotes are currently not supported and arguments are separated by
|
||||
whitespace. If multiple defines exist for the same substitution the first
|
||||
declared is used.
|
||||
- `DEFINE <x> [y]` replaces any occurrence of the first argument (`x`) by the
|
||||
second optional argument (`y`). The second argument can be empty, effectively
|
||||
deleting all occurrences of `x`. Quotes are currently not supported and
|
||||
arguments are separated by whitespace. If multiple defines exist the later
|
||||
declarations will overwrite the previous.
|
||||
|
||||
### Registers
|
||||
|
||||
All registers are 32 bits wide. The following 4 registers currently exist:
|
||||
|
||||
- A
|
||||
- B
|
||||
- C
|
||||
- D
|
||||
|
||||
### Immediates
|
||||
|
||||
An immediate integer value for 42 is for examle `$42`. Negative values are
|
||||
allowed, for example `$-42`. Notation must be in decimal, hexadecimal and octals
|
||||
are **not supported**.
|
||||
|
||||
The immediate character value for the letter g is `'g'`. Character values must
|
||||
be a single character, escaped or multi byte characters are **not supported**.
|
||||
|
||||
### Operands
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#DEFINE
|
||||
#DEFINE LETTER_O 'o'
|
||||
|
||||
addi $10 $-5 %A;
|
||||
subi %A $2 %B;
|
||||
@@ -31,7 +31,7 @@ int PRINT_CHAR;
|
||||
seti %A $108; # l
|
||||
int PRINT_CHAR;
|
||||
int PRINT_CHAR;
|
||||
seti %A $111; # o
|
||||
seti %A LETTER_O;
|
||||
int PRINT_CHAR;
|
||||
|
||||
seti %A $32; # space
|
||||
@@ -105,9 +105,6 @@ seti %B $10;
|
||||
int $5;
|
||||
int $3;
|
||||
|
||||
# Demonstrate string literals
|
||||
"Hello world!";
|
||||
|
||||
exit;
|
||||
|
||||
noop_function:
|
||||
|
||||
21
include/preprocessor/preprocessor.hpp
Normal file
21
include/preprocessor/preprocessor.hpp
Normal file
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class Preprocessor
|
||||
{
|
||||
private:
|
||||
std::vector<std::string> substitutionIdentifiers;
|
||||
std::vector<std::string> substitutionValues;
|
||||
|
||||
void extractComment(std::string & line,
|
||||
std::size_t const lineNumber,
|
||||
std::size_t const lineColumn);
|
||||
|
||||
void processLine(std::string & line, std::size_t const lineNumber);
|
||||
|
||||
public:
|
||||
void process(std::vector<std::string> & lines);
|
||||
|
||||
void printSubstitutions() const;
|
||||
};
|
||||
@@ -15,11 +15,6 @@ namespace Token
|
||||
int const lineNumber,
|
||||
int const lineColumn) const;
|
||||
|
||||
void ParseComment(
|
||||
std::string const & string,
|
||||
int const lineNumber,
|
||||
int const lineColumn);
|
||||
|
||||
void ParseCharacterLiteral(
|
||||
std::string const & line,
|
||||
int const lineNumber,
|
||||
|
||||
6
include/utils.hpp
Normal file
6
include/utils.hpp
Normal file
@@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
namespace Utils
|
||||
{
|
||||
bool isWhitespaceCharacter(char const c);
|
||||
}
|
||||
@@ -9,12 +9,15 @@ class Wassembler
|
||||
private:
|
||||
Configuration config;
|
||||
Execute::VirtualMachine vm;
|
||||
bool printSubstitutions;
|
||||
|
||||
bool LoadLinesFromFile(std::string const & filePath, std::vector<std::string> & lines) const;
|
||||
bool LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const;
|
||||
|
||||
public:
|
||||
void SetMemorySize(unsigned const size);
|
||||
void EnableSubstitutionsLogging();
|
||||
|
||||
bool LoadFromFile(std::string const & filePath);
|
||||
|
||||
void Run();
|
||||
|
||||
2
makefile
2
makefile
@@ -13,7 +13,7 @@ BINARY = bin/wassembler
|
||||
all: ${BINARY}
|
||||
|
||||
check: ${BINARY}
|
||||
./$< ./bin/test.wasm
|
||||
./$< ./bin/test.wasm -p
|
||||
|
||||
clean:
|
||||
-rm -rf build ./${BINARY}
|
||||
|
||||
@@ -7,10 +7,12 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
std::string inputFile;
|
||||
unsigned memorySize = 1024;
|
||||
bool printSubstitutions = false;
|
||||
|
||||
auto cli = (
|
||||
clipp::value("input wasm file").set(inputFile),
|
||||
clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize)
|
||||
clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize),
|
||||
clipp::option("-p", "--print-substitutions").set(printSubstitutions)
|
||||
);
|
||||
|
||||
if (!clipp::parse(argc, argv, cli))
|
||||
@@ -21,6 +23,11 @@ int main(int argc, char ** argv)
|
||||
|
||||
Wassembler wassembler;
|
||||
wassembler.SetMemorySize(memorySize);
|
||||
if (printSubstitutions)
|
||||
{
|
||||
wassembler.EnableSubstitutionsLogging();
|
||||
}
|
||||
|
||||
if (!wassembler.LoadFromFile(inputFile))
|
||||
{
|
||||
exit(1);
|
||||
|
||||
159
src/preprocessor/preprocessor.cpp
Normal file
159
src/preprocessor/preprocessor.cpp
Normal file
@@ -0,0 +1,159 @@
|
||||
#include <preprocessor/preprocessor.hpp>
|
||||
#include <utils.hpp>
|
||||
|
||||
bool trySubstitute(
|
||||
std::string & line,
|
||||
std::size_t const lineColumn,
|
||||
std::vector<std::string> const & substitutionIdentifiers,
|
||||
std::vector<std::string> const & substitutionValues)
|
||||
{
|
||||
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
|
||||
{
|
||||
if (line.compare(lineColumn, substitutionIdentifiers[i].size(), substitutionIdentifiers[i]) != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string const lineCopy = line;
|
||||
line = lineCopy.substr(0, lineColumn) + substitutionValues[i];
|
||||
if (lineCopy.size() > lineColumn + substitutionIdentifiers[i].size())
|
||||
{
|
||||
line += lineCopy.substr(lineColumn + substitutionIdentifiers[i].size(), lineCopy.size());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Preprocessor::extractComment(
|
||||
std::string & line,
|
||||
std::size_t const lineNumber,
|
||||
std::size_t const lineColumn)
|
||||
{
|
||||
if (line.size() <= lineColumn + 1 ||
|
||||
line.compare(lineColumn + 1, std::string::npos, "DEFINE") <= 0)
|
||||
{
|
||||
// No match or empty DEFINE statement
|
||||
line = line.substr(0, lineColumn);
|
||||
}
|
||||
|
||||
enum CommentParseState
|
||||
{
|
||||
LookForArgumentStart,
|
||||
LookForArgumentEnd
|
||||
};
|
||||
|
||||
std::string firstArgument, secondArgument;
|
||||
std::size_t argumentCount = 0, argumentStart = 0;
|
||||
CommentParseState state = LookForArgumentStart;
|
||||
for(std::size_t i = lineColumn + 7; i < line.size(); ++i)
|
||||
{
|
||||
switch(state)
|
||||
{
|
||||
case LookForArgumentStart:
|
||||
if(!Utils::isWhitespaceCharacter(line[i]))
|
||||
{
|
||||
argumentStart = i;
|
||||
state = CommentParseState::LookForArgumentEnd;
|
||||
}
|
||||
break;
|
||||
|
||||
case LookForArgumentEnd:
|
||||
if (Utils::isWhitespaceCharacter(line[i]))
|
||||
{
|
||||
switch(argumentCount)
|
||||
{
|
||||
case 0:
|
||||
firstArgument = line.substr(argumentStart, i - argumentStart);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
secondArgument = line.substr(argumentStart, i - argumentStart);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
++argumentCount;
|
||||
state = CommentParseState::LookForArgumentStart;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch(state)
|
||||
{
|
||||
case CommentParseState::LookForArgumentStart:
|
||||
break;
|
||||
|
||||
case CommentParseState::LookForArgumentEnd:
|
||||
switch(argumentCount)
|
||||
{
|
||||
case 0:
|
||||
firstArgument = line.substr(argumentStart);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
secondArgument = line.substr(argumentStart);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
++argumentCount;
|
||||
break;
|
||||
}
|
||||
|
||||
if (argumentCount > 0)
|
||||
{
|
||||
substitutionIdentifiers.push_back(firstArgument);
|
||||
substitutionValues.push_back(secondArgument);
|
||||
}
|
||||
|
||||
line = line.substr(0, lineColumn);
|
||||
}
|
||||
|
||||
void Preprocessor::processLine(std::string & line, std::size_t const lineNumber)
|
||||
{
|
||||
for(std::size_t i = 0; i < line.size(); ++i)
|
||||
{
|
||||
if (!Utils::isWhitespaceCharacter(line[i]))
|
||||
{
|
||||
if (trySubstitute(line, i, substitutionIdentifiers, substitutionValues))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line[i] == '#')
|
||||
{
|
||||
extractComment(line, lineNumber, i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Preprocessor::process(std::vector<std::string> & lines)
|
||||
{
|
||||
substitutionIdentifiers.clear();
|
||||
substitutionValues.clear();
|
||||
|
||||
for(std::size_t i = 0; i < lines.size(); ++i)
|
||||
{
|
||||
processLine(lines[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
void Preprocessor::printSubstitutions() const
|
||||
{
|
||||
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
|
||||
{
|
||||
std::printf(
|
||||
"%s -> %s\n",
|
||||
substitutionIdentifiers[i].c_str(),
|
||||
substitutionValues[i].c_str());
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <stdexcept>
|
||||
#include <token/errors.hpp>
|
||||
#include <token/tokenizer.hpp>
|
||||
#include <utils.hpp>
|
||||
|
||||
namespace Token
|
||||
{
|
||||
@@ -150,111 +151,6 @@ namespace Token
|
||||
return Token::CreateLabelToken(string, lineNumber, lineColumn);
|
||||
}
|
||||
|
||||
bool IsWhiteSpace(char const c)
|
||||
{
|
||||
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
|
||||
}
|
||||
|
||||
void Tokenizer::ParseComment(
|
||||
std::string const & string,
|
||||
int const lineNumber,
|
||||
int const lineColumn)
|
||||
{
|
||||
unsigned const commentContentStart = lineColumn + 1;
|
||||
if (string.size() < commentContentStart ||
|
||||
IsWhiteSpace(string[commentContentStart]))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
enum class CommentParseState
|
||||
{
|
||||
LookForDirectiveEnd,
|
||||
LookForArgumentStart,
|
||||
LookForArgumentEnd
|
||||
};
|
||||
std::string firstArgument, secondArgument;
|
||||
unsigned argumentCount = 0, argumentStart = 0;
|
||||
CommentParseState state = CommentParseState::LookForDirectiveEnd;
|
||||
for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
|
||||
{
|
||||
switch(state)
|
||||
{
|
||||
case CommentParseState::LookForDirectiveEnd:
|
||||
if(IsWhiteSpace(string[i]))
|
||||
{
|
||||
if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
|
||||
{
|
||||
// Nonzero = not equal
|
||||
return;
|
||||
}
|
||||
|
||||
state = CommentParseState::LookForArgumentStart;
|
||||
}
|
||||
break;
|
||||
|
||||
case CommentParseState::LookForArgumentStart:
|
||||
if(!IsWhiteSpace(string[i]))
|
||||
{
|
||||
argumentStart = i;
|
||||
state = CommentParseState::LookForArgumentEnd;
|
||||
}
|
||||
break;
|
||||
|
||||
case CommentParseState::LookForArgumentEnd:
|
||||
if (IsWhiteSpace(string[i]))
|
||||
{
|
||||
state = CommentParseState::LookForArgumentStart;
|
||||
switch(argumentCount)
|
||||
{
|
||||
case 0:
|
||||
firstArgument = string.substr(argumentStart, i - argumentStart);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
secondArgument = string.substr(argumentStart, i - argumentStart);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto end_state_loop;
|
||||
}
|
||||
++argumentCount;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
end_state_loop:
|
||||
switch(state)
|
||||
{
|
||||
case CommentParseState::LookForDirectiveEnd:
|
||||
case CommentParseState::LookForArgumentStart:
|
||||
break;
|
||||
|
||||
case CommentParseState::LookForArgumentEnd:
|
||||
switch(argumentCount)
|
||||
{
|
||||
case 0:
|
||||
firstArgument = string.substr(argumentStart);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
secondArgument = string.substr(argumentStart);
|
||||
break;
|
||||
}
|
||||
++argumentCount;
|
||||
break;
|
||||
}
|
||||
|
||||
if (argumentCount > 0)
|
||||
{
|
||||
substitutions.push_back(std::make_pair(firstArgument, secondArgument));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Modifies the lineColumn parameter to point at the character literal end
|
||||
void Tokenizer::ParseCharacterLiteral(
|
||||
std::string const & line,
|
||||
@@ -314,16 +210,12 @@ end_state_loop:
|
||||
switch(state)
|
||||
{
|
||||
case TokenizerState::LookForNextToken:
|
||||
if (!IsWhiteSpace(line[column]))
|
||||
if (!Utils::isWhitespaceCharacter(line[column]))
|
||||
{
|
||||
if (line[column] == '#')
|
||||
{
|
||||
ParseComment(line, lineNumber, column);
|
||||
return;
|
||||
}
|
||||
|
||||
if (line[column] == '\'')
|
||||
{
|
||||
// TODO integrate this better with the existing extract token
|
||||
// infrastructure
|
||||
ParseCharacterLiteral(line, lineNumber, column, tokens);
|
||||
break;
|
||||
}
|
||||
@@ -345,7 +237,7 @@ end_state_loop:
|
||||
break;
|
||||
|
||||
case TokenizerState::LookForTokenEnd:
|
||||
if (IsWhiteSpace(line[column]) || line[column] == ';')
|
||||
if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';')
|
||||
{
|
||||
tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
|
||||
if (line[column] == ';')
|
||||
|
||||
9
src/utils.cpp
Normal file
9
src/utils.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include <utils.hpp>
|
||||
|
||||
namespace Utils
|
||||
{
|
||||
bool isWhitespaceCharacter(char const c)
|
||||
{
|
||||
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <fstream>
|
||||
#include <interpret/errors.hpp>
|
||||
#include <preprocessor/preprocessor.hpp>
|
||||
#include <token/errors.hpp>
|
||||
#include <wassembler.hpp>
|
||||
|
||||
@@ -48,7 +49,6 @@ bool Wassembler::LoadLinesFromFile(std::string const & filePath, std::vector<std
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Wassembler::LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const
|
||||
{
|
||||
Token::Tokenizer tokenizer;
|
||||
@@ -86,6 +86,11 @@ void Wassembler::SetMemorySize(unsigned const size)
|
||||
config.memorySize = size;
|
||||
}
|
||||
|
||||
void Wassembler::EnableSubstitutionsLogging()
|
||||
{
|
||||
printSubstitutions = true;
|
||||
}
|
||||
|
||||
bool Wassembler::LoadFromFile(std::string const & filePath)
|
||||
{
|
||||
std::vector<std::string> lines;
|
||||
@@ -95,6 +100,13 @@ bool Wassembler::LoadFromFile(std::string const & filePath)
|
||||
return false;
|
||||
}
|
||||
|
||||
Preprocessor preprocessor;
|
||||
preprocessor.process(lines);
|
||||
if (printSubstitutions)
|
||||
{
|
||||
preprocessor.printSubstitutions();
|
||||
}
|
||||
|
||||
std::vector<Token::Token> tokens;
|
||||
if (!LoadTokens(lines, tokens))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user