#include #include #include #include #include #include namespace Token { std::optional TryParseInt(std::string const & string) { try { int value = std::stoi(string); return std::make_optional(value); } catch(std::invalid_argument &) { return std::nullopt; } } Token Tokenizer::ExtractToken(std::string string, int const lineNumber, int const lineColumn) const { if (string.size() == 0) { // TODO Should this become an error token? return Token::CreateEmptyToken(lineNumber, lineColumn); } for(std::size_t i = 0; i < substitutions.size(); ++i) { if (string == substitutions[i].first) { string = substitutions[i].second; break; } } char const prefix = string[0]; switch(prefix) { case '$': { auto const result = TryParseInt(string.substr(1, string.size())); if (result.has_value()) { return Token::CreateImmediateValueToken( result.value(), lineNumber, lineColumn); } return Token::CreateErrorToken( "Immediate cannot be parsed as an integer", TokenType::ImmediateInteger, lineNumber, lineColumn); } case '%': return Token::CreateRegisterToken(GetRegisterType( string.substr(1, string.size())), lineNumber, lineColumn); case ';': return Token::CreateStatementEndToken(lineNumber, lineColumn); default: break; } char const postfix = string[string.size() - 1]; if (postfix == ':') { // TODO check if label is an Operand? return Token::CreateLabelToken( string.substr(0, string.size() - 1), lineNumber, lineColumn); } if (prefix == '[' && postfix == ']') { if(string.size() < 4) { return Token::CreateErrorToken( "Memory address statement is empty", TokenType::Memory, lineNumber, lineColumn); } char const memoryPrefix = string[1]; std::string const valueString = string.substr(2, string.size() - 3u); if (memoryPrefix == '$') { auto const result = TryParseInt(valueString); if (result.has_value()) { return Token::CreateMemoryToken( result.value(), lineNumber, lineColumn); } return Token::CreateErrorToken( "Memory immediate address cannot be parsed as an integer", TokenType::Memory, lineNumber, lineColumn); } else if (memoryPrefix == '%') { return Token::CreateMemoryToken( GetRegisterType(valueString), lineNumber, lineColumn); } else { return Token::CreateErrorToken( "Memory immediate address contains an unexpected value", TokenType::Memory, lineNumber, lineColumn + 1u); } } else if (prefix == '[' || postfix == ']') { int const errorLineColumn = (prefix == '[') ? lineColumn : (lineColumn + string.size() - 1u); return Token::CreateErrorToken( "Non terminated memory address brackets", TokenType::Memory, lineNumber, errorLineColumn); } OperandType const opType = GetOperandType(string); if (opType != OperandType::Unknown) { return Token::CreateOperandToken(opType, lineNumber, lineColumn); } // Last resort: it must be a label return Token::CreateLabelToken(string, lineNumber, lineColumn); } // Modifies the lineColumn parameter to point at the character literal end void Tokenizer::ParseCharacterLiteral( std::string const & line, int const lineNumber, unsigned & lineColumn, std::vector & tokens) const { for(unsigned int i = lineColumn + 1; i < line.size(); ++i) { if (line[i] == '\'') { if (lineColumn + 2u != i) { tokens.push_back(Token::CreateErrorToken( "Character literal must be exactly 1 character long between single quotes", TokenType::ImmediateInteger, lineNumber, lineColumn + 1u)); } else { tokens.push_back(Token::CreateImmediateValueToken( line[i - 1], lineNumber, lineColumn + 1)); } lineColumn = i; return; } } tokens.push_back(Token::CreateErrorToken( "Non terminated character literal", TokenType::ImmediateInteger, lineNumber, lineColumn)); lineColumn = line.size(); } void Tokenizer::Tokenize( std::string const & line, int const lineNumber, std::vector & tokens) { enum class TokenizerState { LookForNextToken, LookForTokenEnd, }; TokenizerState state = TokenizerState::LookForNextToken; unsigned columnTokenStart = 0; for(unsigned column = 0u; column < line.size(); ++column) { switch(state) { case TokenizerState::LookForNextToken: if (!Utils::isWhitespaceCharacter(line[column])) { if (line[column] == '\'') { // TODO integrate this better with the existing extract token // infrastructure ParseCharacterLiteral(line, lineNumber, column, tokens); break; } columnTokenStart = column; switch(line[column]) { case ';': tokens.push_back( ExtractToken(line.substr(column, 1), lineNumber, column)); break; default: state = TokenizerState::LookForTokenEnd; break; } } break; case TokenizerState::LookForTokenEnd: if (Utils::isWhitespaceCharacter(line[column]) || line[column] == ';') { tokens.push_back(ExtractToken(line.substr(columnTokenStart, column - columnTokenStart), lineNumber, columnTokenStart)); if (line[column] == ';') { tokens.push_back(ExtractToken(line.substr(column, 1), lineNumber, column)); } state = TokenizerState::LookForNextToken; } break; default: std::puts("DEBUG: Unhandled TokenizerState value"); break; } } switch(state) { case TokenizerState::LookForTokenEnd: tokens.push_back(ExtractToken( line.substr(columnTokenStart, line.size()), lineNumber, columnTokenStart)); break; case TokenizerState::LookForNextToken: default: break; } } }