Extract preprocessing from tokenizer

This commit is contained in:
2020-08-29 11:25:10 +02:00
parent aebc1dd86d
commit 71678b2ec6
12 changed files with 254 additions and 134 deletions

View File

@@ -3,6 +3,7 @@
#include <stdexcept>
#include <token/errors.hpp>
#include <token/tokenizer.hpp>
#include <utils.hpp>
namespace Token
{
@@ -150,111 +151,6 @@ namespace Token
return Token::CreateLabelToken(string, lineNumber, lineColumn);
}
bool IsWhiteSpace(char const c)
{
return c == '\n' || c == ' ' || c == '\t' || c == '\r';
}
void Tokenizer::ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn)
{
unsigned const commentContentStart = lineColumn + 1;
if (string.size() < commentContentStart ||
IsWhiteSpace(string[commentContentStart]))
{
return;
}
enum class CommentParseState
{
LookForDirectiveEnd,
LookForArgumentStart,
LookForArgumentEnd
};
std::string firstArgument, secondArgument;
unsigned argumentCount = 0, argumentStart = 0;
CommentParseState state = CommentParseState::LookForDirectiveEnd;
for(unsigned i = commentContentStart + 1; i < string.size(); ++i)
{
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
if(IsWhiteSpace(string[i]))
{
if (string.compare(commentContentStart, i - commentContentStart, "DEFINE"))
{
// Nonzero = not equal
return;
}
state = CommentParseState::LookForArgumentStart;
}
break;
case CommentParseState::LookForArgumentStart:
if(!IsWhiteSpace(string[i]))
{
argumentStart = i;
state = CommentParseState::LookForArgumentEnd;
}
break;
case CommentParseState::LookForArgumentEnd:
if (IsWhiteSpace(string[i]))
{
state = CommentParseState::LookForArgumentStart;
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart, i - argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart, i - argumentStart);
break;
default:
goto end_state_loop;
}
++argumentCount;
}
break;
}
}
end_state_loop:
switch(state)
{
case CommentParseState::LookForDirectiveEnd:
case CommentParseState::LookForArgumentStart:
break;
case CommentParseState::LookForArgumentEnd:
switch(argumentCount)
{
case 0:
firstArgument = string.substr(argumentStart);
break;
case 1:
secondArgument = string.substr(argumentStart);
break;
}
++argumentCount;
break;
}
if (argumentCount > 0)
{
substitutions.push_back(std::make_pair(firstArgument, secondArgument));
}
else
{
std::printf("WARNING: DEFINE with no arguments on line %u\n", lineNumber + 1);
}
}
// Modifies the lineColumn parameter to point at the character literal end
void Tokenizer::ParseCharacterLiteral(
std::string const & line,
@@ -314,16 +210,12 @@ end_state_loop:
switch(state)
{
case TokenizerState::LookForNextToken:
if (!IsWhiteSpace(line[column]))
if (!Utils::isWhitespaceCharacter(line[column]))
{
if (line[column] == '#')
{
ParseComment(line, lineNumber, column);
return;
}
if (line[column] == '\'')
{
// TODO integrate this better with the existing extract token
// infrastructure
ParseCharacterLiteral(line, lineNumber, column, tokens);
break;
}
@@ -345,7 +237,7 @@ end_state_loop:
break;
case TokenizerState::LookForTokenEnd:
if (IsWhiteSpace(line[column]) || line[column] == ';')
if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';')
{
tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart));
if (line[column] == ';')