Extract preprocessing from tokenizer

This commit is contained in:
2020-08-29 11:25:10 +02:00
parent aebc1dd86d
commit 71678b2ec6
12 changed files with 254 additions and 134 deletions

View File

@@ -7,10 +7,12 @@ int main(int argc, char ** argv)
{
std::string inputFile;
unsigned memorySize = 1024;
bool printSubstitutions = false;
auto cli = (
clipp::value("input wasm file").set(inputFile),
clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize)
clipp::option("-m", "--memory-size") & clipp::value("memory size", memorySize),
clipp::option("-p", "--print-substitutions").set(printSubstitutions)
);
if (!clipp::parse(argc, argv, cli))
@@ -21,6 +23,11 @@ int main(int argc, char ** argv)
Wassembler wassembler;
wassembler.SetMemorySize(memorySize);
if (printSubstitutions)
{
wassembler.EnableSubstitutionsLogging();
}
if (!wassembler.LoadFromFile(inputFile))
{
exit(1);

View File

@@ -0,0 +1,159 @@
#include <preprocessor/preprocessor.hpp>
#include <utils.hpp>
bool trySubstitute(
std::string & line,
std::size_t const lineColumn,
std::vector<std::string> const & substitutionIdentifiers,
std::vector<std::string> const & substitutionValues)
{
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
{
if (line.compare(lineColumn, substitutionIdentifiers[i].size(), substitutionIdentifiers[i]) != 0)
{
continue;
}
std::string const lineCopy = line;
line = lineCopy.substr(0, lineColumn) + substitutionValues[i];
if (lineCopy.size() > lineColumn + substitutionIdentifiers[i].size())
{
line += lineCopy.substr(lineColumn + substitutionIdentifiers[i].size(), lineCopy.size());
}
return true;
}
return false;
}
void Preprocessor::extractComment(
std::string & line,
std::size_t const lineNumber,
std::size_t const lineColumn)
{
if (line.size() <= lineColumn + 1 ||
line.compare(lineColumn + 1, std::string::npos, "DEFINE") <= 0)
{
// No match or empty DEFINE statement
line = line.substr(0, lineColumn);
}
enum CommentParseState
{
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
std::size_t argumentCount = 0, argumentStart = 0;
CommentParseState state = LookForArgumentStart;
for(std::size_t i = lineColumn + 7; i < line.size(); ++i)
{
switch(state)
{
case LookForArgumentStart:
if(!Utils::isWhitespaceCharacter(line[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case LookForArgumentEnd:
if (Utils::isWhitespaceCharacter(line[i]))
{
switch(argumentCount)
{
case 0:
firstArgument = line.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = line.substr(argumentStart, i - argumentStart);
break;
default:
break;
}
++argumentCount;
state = CommentParseState::LookForArgumentStart;
}
break;
}
}
switch(state)
{
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = line.substr(argumentStart);
break;
case 1:
secondArgument = line.substr(argumentStart);
break;
default:
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutionIdentifiers.push_back(firstArgument);
substitutionValues.push_back(secondArgument);
}
line = line.substr(0, lineColumn);
}
void Preprocessor::processLine(std::string & line, std::size_t const lineNumber)
{
for(std::size_t i = 0; i < line.size(); ++i)
{
if (!Utils::isWhitespaceCharacter(line[i]))
{
if (trySubstitute(line, i, substitutionIdentifiers, substitutionValues))
{
continue;
}
if (line[i] == '#')
{
extractComment(line, lineNumber, i);
return;
}
}
}
}
void Preprocessor::process(std::vector<std::string> & lines)
{
substitutionIdentifiers.clear();
substitutionValues.clear();
for(std::size_t i = 0; i < lines.size(); ++i)
{
processLine(lines[i], i);
}
}
void Preprocessor::printSubstitutions() const
{
for(std::size_t i = 0; i < substitutionIdentifiers.size(); ++i)
{
std::printf(
"%s -> %s\n",
substitutionIdentifiers[i].c_str(),
substitutionValues[i].c_str());
}
}

View File

@@ -3,6 +3,7 @@
#include <stdexcept>
#include <token/errors.hpp>
#include <token/tokenizer.hpp>
#include <utils.hpp>
namespace Token
{
@@ -150,111 +151,6 @@ namespace Token
return Token::CreateLabelToken(string, lineNumber, lineColumn);
}
bool IsWhiteSpace(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
void Tokenizer::ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn)
{
unsigned const commentContentStart = lineColumn + 1;
if (string.size() < commentContentStart ||
IsWhiteSpace(string[commentContentStart]))
{
return;
}
enum class CommentParseState
{
LookForDirectiveEnd,
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
unsigned argumentCount = 0, argumentStart = 0;
CommentParseState state = CommentParseState::LookForDirectiveEnd;
for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
{
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
if(IsWhiteSpace(string[i]))
{
if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
{
// Nonzero = not equal
return;
}
state = CommentParseState::LookForArgumentStart;
}
break;
case CommentParseState::LookForArgumentStart:
if(!IsWhiteSpace(string[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case CommentParseState::LookForArgumentEnd:
if (IsWhiteSpace(string[i]))
{
state = CommentParseState::LookForArgumentStart;
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart, i - argumentStart);
break;
default:
goto end_state_loop;
}
++argumentCount;
}
break;
}
}
end_state_loop:
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart);
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutions.push_back(std::make_pair(firstArgument, secondArgument));
}
else
{
std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
}
}
// Modifies the lineColumn parameter to point at the character literal end
void Tokenizer::ParseCharacterLiteral(
std::string const & line,
@@ -314,16 +210,12 @@ end_state_loop:
switch(state)
{
case TokenizerState::LookForNextToken:
if (!IsWhiteSpace(line[column]))
if (!Utils::isWhitespaceCharacter(line[column]))
{
if (line[column] == '#')
{
ParseComment(line, lineNumber, column);
return;
}
if (line[column] == '\'')
{
// TODO integrate this better with the existing extract token
// infrastructure
ParseCharacterLiteral(line, lineNumber, column, tokens);
break;
}
@@ -345,7 +237,7 @@ end_state_loop:
break;
case TokenizerState::LookForTokenEnd:
if (IsWhiteSpace(line[column]) || line[column] == ';')
if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';')
{
tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
if (line[column] == ';')

9
src/utils.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include <utils.hpp>
namespace Utils
{
bool isWhitespaceCharacter(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
}

View File

@@ -1,5 +1,6 @@
#include <fstream>
#include <interpret/errors.hpp>
#include <preprocessor/preprocessor.hpp>
#include <token/errors.hpp>
#include <wassembler.hpp>
@@ -48,7 +49,6 @@ bool Wassembler::LoadLinesFromFile(std::string const & filePath, std::vector<std
return true;
}
bool Wassembler::LoadTokens(std::vector<std::string> const & lines, std::vector<Token::Token> & tokens) const
{
Token::Tokenizer tokenizer;
@@ -86,6 +86,11 @@ void Wassembler::SetMemorySize(unsigned const size)
config.memorySize = size;
}
void Wassembler::EnableSubstitutionsLogging()
{
printSubstitutions = true;
}
bool Wassembler::LoadFromFile(std::string const & filePath)
{
std::vector<std::string> lines;
@@ -95,6 +100,13 @@ bool Wassembler::LoadFromFile(std::string const & filePath)
return false;
}
Preprocessor preprocessor;
preprocessor.process(lines);
if (printSubstitutions)
{
preprocessor.printSubstitutions();
}
std::vector<Token::Token> tokens;
if (!LoadTokens(lines, tokens))
{