263 lines
5.6 KiB
C++
263 lines
5.6 KiB
C++
#include <map>
|
|
#include <optional>
|
|
#include <stdexcept>
|
|
#include <token/errors.hpp>
|
|
#include <token/tokenizer.hpp>
|
|
#include <utils.hpp>
|
|
|
|
namespace Token
|
|
{
|
|
std::optional<int> TryParseInt(std::string const & string)
|
|
{
|
|
try
|
|
{
|
|
int value = std::stoi(string);
|
|
return std::make_optional<int>(value);
|
|
}
|
|
catch(std::invalid_argument &)
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
Token GetCharacterLiteralToken(
|
|
std::string const & token,
|
|
std::size_t const lineNumber,
|
|
std::size_t const lineColumn)
|
|
{
|
|
for(std::size_t i = 1; i < token.size(); ++i)
|
|
{
|
|
if (token[i] == '\'')
|
|
{
|
|
if (i != 2)
|
|
{
|
|
return Token::CreateErrorToken(
|
|
"Character literal must be exactly 1 character long between single quotes",
|
|
TokenType::ImmediateInteger,
|
|
lineNumber,
|
|
lineColumn + 1u);
|
|
}
|
|
else
|
|
{
|
|
return Token::CreateImmediateValueToken(
|
|
token[1],
|
|
lineNumber,
|
|
lineColumn + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
return Token::CreateErrorToken(
|
|
"Non terminated character literal",
|
|
TokenType::ImmediateInteger,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
Token GetMemoryToken(
|
|
std::string const & token,
|
|
std::size_t const lineNumber,
|
|
std::size_t const lineColumn)
|
|
{
|
|
// Minimal example: [$1] or [%A]
|
|
if(token.size() < 4)
|
|
{
|
|
return Token::CreateErrorToken(
|
|
"Memory address statement is empty",
|
|
TokenType::Memory,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
if (token[0] != '[' || token[token.size() - 1] != ']')
|
|
{
|
|
return Token::CreateErrorToken(
|
|
"Non terminated memory address brackets",
|
|
TokenType::Memory,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
char const memoryPrefix = token[1];
|
|
std::string const valueString = token.substr(2, token.size() - 3u);
|
|
if (memoryPrefix == '$')
|
|
{
|
|
auto const result = TryParseInt(valueString);
|
|
|
|
if (result.has_value())
|
|
{
|
|
return Token::CreateMemoryToken(
|
|
result.value(),
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
return Token::CreateErrorToken(
|
|
"Memory immediate address cannot be parsed as an integer",
|
|
TokenType::Memory,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
else if (memoryPrefix == '%')
|
|
{
|
|
return Token::CreateMemoryToken(
|
|
GetRegisterType(valueString),
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
return Token::CreateErrorToken(
|
|
"Memory immediate address contains an unexpected value",
|
|
TokenType::Memory,
|
|
lineNumber,
|
|
lineColumn + 1u);
|
|
}
|
|
|
|
Token GetUnterminatedCharacterLiteralError(
|
|
std::size_t const lineNumber,
|
|
std::size_t const lineColumn)
|
|
{
|
|
return Token::CreateErrorToken(
|
|
"Unterminated character or string literal",
|
|
TokenType::Unknown,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
Token Tokenizer::ExtractToken(
|
|
std::string const & string,
|
|
std::size_t const lineNumber,
|
|
std::size_t const lineColumn) const
|
|
{
|
|
char const prefix = string[0];
|
|
switch(prefix)
|
|
{
|
|
case '$':
|
|
{
|
|
auto const result = TryParseInt(string.substr(1, string.size()));
|
|
|
|
if (result.has_value())
|
|
{
|
|
return Token::CreateImmediateValueToken(
|
|
result.value(),
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
return Token::CreateErrorToken(
|
|
"Immediate cannot be parsed as an integer",
|
|
TokenType::ImmediateInteger,
|
|
lineNumber,
|
|
lineColumn);
|
|
}
|
|
|
|
case '%':
|
|
return Token::CreateRegisterToken(GetRegisterType(
|
|
string.substr(1, string.size())),
|
|
lineNumber,
|
|
lineColumn);
|
|
|
|
case '\'':
|
|
return GetCharacterLiteralToken(string, lineNumber, lineColumn);
|
|
|
|
case ';':
|
|
return Token::CreateStatementEndToken(lineNumber, lineColumn);
|
|
|
|
case '[':
|
|
return GetMemoryToken(string, lineNumber, lineColumn);
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
char const postfix = string[string.size() - 1];
|
|
switch(postfix)
|
|
{
|
|
case ']':
|
|
return GetMemoryToken(string, lineNumber, lineColumn);
|
|
|
|
case ':':
|
|
// TODO check if label is an Operand?
|
|
return Token::CreateLabelDefinitionToken(
|
|
string.substr(0, string.size() - 1),
|
|
lineNumber,
|
|
lineColumn);
|
|
|
|
case '\'':
|
|
case '\"':
|
|
// This shouldn't happen
|
|
return GetUnterminatedCharacterLiteralError(lineNumber, lineColumn);
|
|
}
|
|
|
|
OperandType const opType = GetOperandType(string);
|
|
if (opType != OperandType::Unknown)
|
|
{
|
|
return Token::CreateOperandToken(opType, lineNumber, lineColumn);
|
|
}
|
|
|
|
// Last resort: it must be a jump target
|
|
return Token::CreateLabelArgumentToken(string, lineNumber, lineColumn);
|
|
}
|
|
|
|
void Tokenizer::Tokenize(
|
|
std::string const & line,
|
|
std::size_t const lineNumber,
|
|
std::vector<Token> & tokens)
|
|
{
|
|
for(std::size_t column = 0u; column < line.size(); ++column)
|
|
{
|
|
if (Utils::isWhitespaceCharacter(line[column]))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
switch(line[column])
|
|
{
|
|
case '\'':
|
|
case '\"':
|
|
{
|
|
auto const result = Utils::getValueSurroundedBy(
|
|
line,
|
|
column,
|
|
line[column]);
|
|
if (result.has_value())
|
|
{
|
|
tokens.push_back(ExtractToken(result.value(), lineNumber, column));
|
|
column += result.value().size() - 1;
|
|
}
|
|
else
|
|
{
|
|
tokens.push_back(
|
|
GetUnterminatedCharacterLiteralError(lineNumber, column));
|
|
|
|
// Parsing must stop here, the line is malformed
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ';':
|
|
tokens.push_back(ExtractToken(";", lineNumber, column));
|
|
break;
|
|
|
|
default:
|
|
{
|
|
auto const result = Utils::getValueSurroundedByWhitespace(line, column);
|
|
auto const lastCharacterIndex = result.size() - 1;
|
|
if (result[lastCharacterIndex] == ';')
|
|
{
|
|
tokens.push_back(ExtractToken(result.substr(0, result.size() -1), lineNumber, column));
|
|
tokens.push_back(ExtractToken(";", lineNumber, column + lastCharacterIndex));
|
|
}
|
|
else
|
|
{
|
|
tokens.push_back(ExtractToken(result, lineNumber, column));
|
|
}
|
|
|
|
column += result.size();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} |