Files
wassembly/src/token/tokenizer.cpp

263 lines
5.6 KiB
C++

#include <map>
#include <optional>
#include <stdexcept>
#include <token/errors.hpp>
#include <token/tokenizer.hpp>
#include <utils.hpp>
namespace Token
{
std::optional<int> TryParseInt(std::string const & string)
{
try
{
int value = std::stoi(string);
return std::make_optional<int>(value);
}
catch(std::invalid_argument &)
{
return std::nullopt;
}
}
Token GetCharacterLiteralToken(
std::string const & token,
std::size_t const lineNumber,
std::size_t const lineColumn)
{
for(std::size_t i = 1; i < token.size(); ++i)
{
if (token[i] == '\'')
{
if (i != 2)
{
return Token::CreateErrorToken(
"Character literal must be exactly 1 character long between single quotes",
TokenType::ImmediateInteger,
lineNumber,
lineColumn + 1u);
}
else
{
return Token::CreateImmediateValueToken(
token[1],
lineNumber,
lineColumn + 1);
}
}
}
return Token::CreateErrorToken(
"Non terminated character literal",
TokenType::ImmediateInteger,
lineNumber,
lineColumn);
}
Token GetMemoryToken(
std::string const & token,
std::size_t const lineNumber,
std::size_t const lineColumn)
{
// Minimal example: [$1] or [%A]
if(token.size() < 4)
{
return Token::CreateErrorToken(
"Memory address statement is empty",
TokenType::Memory,
lineNumber,
lineColumn);
}
if (token[0] != '[' || token[token.size() - 1] != ']')
{
return Token::CreateErrorToken(
"Non terminated memory address brackets",
TokenType::Memory,
lineNumber,
lineColumn);
}
char const memoryPrefix = token[1];
std::string const valueString = token.substr(2, token.size() - 3u);
if (memoryPrefix == '$')
{
auto const result = TryParseInt(valueString);
if (result.has_value())
{
return Token::CreateMemoryToken(
result.value(),
lineNumber,
lineColumn);
}
return Token::CreateErrorToken(
"Memory immediate address cannot be parsed as an integer",
TokenType::Memory,
lineNumber,
lineColumn);
}
else if (memoryPrefix == '%')
{
return Token::CreateMemoryToken(
GetRegisterType(valueString),
lineNumber,
lineColumn);
}
return Token::CreateErrorToken(
"Memory immediate address contains an unexpected value",
TokenType::Memory,
lineNumber,
lineColumn + 1u);
}
Token GetUnterminatedCharacterLiteralError(
std::size_t const lineNumber,
std::size_t const lineColumn)
{
return Token::CreateErrorToken(
"Unterminated character or string literal",
TokenType::Unknown,
lineNumber,
lineColumn);
}
Token Tokenizer::ExtractToken(
std::string const & string,
std::size_t const lineNumber,
std::size_t const lineColumn) const
{
char const prefix = string[0];
switch(prefix)
{
case '$':
{
auto const result = TryParseInt(string.substr(1, string.size()));
if (result.has_value())
{
return Token::CreateImmediateValueToken(
result.value(),
lineNumber,
lineColumn);
}
return Token::CreateErrorToken(
"Immediate cannot be parsed as an integer",
TokenType::ImmediateInteger,
lineNumber,
lineColumn);
}
case '%':
return Token::CreateRegisterToken(GetRegisterType(
string.substr(1, string.size())),
lineNumber,
lineColumn);
case '\'':
return GetCharacterLiteralToken(string, lineNumber, lineColumn);
case ';':
return Token::CreateStatementEndToken(lineNumber, lineColumn);
case '[':
return GetMemoryToken(string, lineNumber, lineColumn);
default:
break;
}
char const postfix = string[string.size() - 1];
switch(postfix)
{
case ']':
return GetMemoryToken(string, lineNumber, lineColumn);
case ':':
// TODO check if label is an Operand?
return Token::CreateLabelDefinitionToken(
string.substr(0, string.size() - 1),
lineNumber,
lineColumn);
case '\'':
case '\"':
// This shouldn't happen
return GetUnterminatedCharacterLiteralError(lineNumber, lineColumn);
}
OperandType const opType = GetOperandType(string);
if (opType != OperandType::Unknown)
{
return Token::CreateOperandToken(opType, lineNumber, lineColumn);
}
// Last resort: it must be a jump target
return Token::CreateLabelArgumentToken(string, lineNumber, lineColumn);
}
void Tokenizer::Tokenize(
std::string const & line,
std::size_t const lineNumber,
std::vector<Token> & tokens)
{
for(std::size_t column = 0u; column < line.size(); ++column)
{
if (Utils::isWhitespaceCharacter(line[column]))
{
continue;
}
switch(line[column])
{
case '\'':
case '\"':
{
auto const result = Utils::getValueSurroundedBy(
line,
column,
line[column]);
if (result.has_value())
{
tokens.push_back(ExtractToken(result.value(), lineNumber, column));
column += result.value().size() - 1;
}
else
{
tokens.push_back(
GetUnterminatedCharacterLiteralError(lineNumber, column));
// Parsing must stop here, the line is malformed
return;
}
}
break;
case ';':
tokens.push_back(ExtractToken(";", lineNumber, column));
break;
default:
{
auto const result = Utils::getValueSurroundedByWhitespace(line, column);
auto const lastCharacterIndex = result.size() - 1;
if (result[lastCharacterIndex] == ';')
{
tokens.push_back(ExtractToken(result.substr(0, result.size() -1), lineNumber, column));
tokens.push_back(ExtractToken(";", lineNumber, column + lastCharacterIndex));
}
else
{
tokens.push_back(ExtractToken(result, lineNumber, column));
}
column += result.size();
}
break;
}
}
}
}