Introduce simple character literal

This commit is contained in:
2020-08-28 20:00:49 +02:00
parent d396628310
commit 593506a907
4 changed files with 102 additions and 14 deletions

View File

@@ -23,7 +23,9 @@ afterwards can be a bit cryptic as to where it originated.
- `[operation][number type]`, e.g. `divi` for divide (div) integer - `[operation][number type]`, e.g. `divi` for divide (div) integer
- `%[register]` for addressing registers - `%[register]` for addressing registers
- `$[value]` for using literals/immediate values - `$[value]` for using immediate (literal) integer values
- `'a'` for using immediate character values (currently only supports non
escaped characters)
- `;` for end of statement (mandatory) - `;` for end of statement (mandatory)
- `[label]:` for labels - `[label]:` for labels
- `#[text]` for comments: any text is ignored till a newline (`\n`) is found - `#[text]` for comments: any text is ignored till a newline (`\n`) is found
@@ -63,7 +65,8 @@ The following whitespace characters are used to separate symbols:
- newline (`\n`) - newline (`\n`)
The following characters are used as identifiers: The following characters are used as identifiers:
- dollar (`$`) for immediate (literal) values - dollar (`$`) for immediate (literal) integer values
- single quote (`'`) for immediate character values
- percentage (`%`) for register identifiers - percentage (`%`) for register identifiers
- colon (`:`) for jump labels - colon (`:`) for jump labels
- semicolon (`;`) for statement termination - semicolon (`;`) for statement termination
@@ -86,7 +89,7 @@ second byte of location `$900`).
All symbols are reserved keywords and can therefore NOT be used as labels. All symbols are reserved keywords and can therefore NOT be used as labels.
There is currently no strict checking, so be careful. There is currently no strict checking, so be careful.
## Preprocessor ### Preprocessor
All preprocessor directives are prefixed by a `#`. Ill formed preprocessor All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
directives do not halt compilation, they are merely reported and then ignored. directives do not halt compilation, they are merely reported and then ignored.

View File

@@ -24,9 +24,9 @@ jmp count_loop;
#DEFINE PRINT_CHAR $0 #DEFINE PRINT_CHAR $0
# Hello world # Hello world
seti %A $72; # H seti %A 'H'; # H
int PRINT_CHAR; int PRINT_CHAR;
seti %A $101; # e seti %A 'e'; # e
int PRINT_CHAR; int PRINT_CHAR;
seti %A $108; # l seti %A $108; # l
int PRINT_CHAR; int PRINT_CHAR;

View File

@@ -10,8 +10,21 @@ namespace Token
private: private:
std::vector<std::pair<std::string, std::string>> substitutions; std::vector<std::pair<std::string, std::string>> substitutions;
Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const; Token ExtractToken(
void ParseComment(std::string const & string, int const lineNumber, int const lineColumn); std::string string,
int const lineNumber,
int const lineColumn) const;
void ParseComment(
std::string const & string,
int const lineNumber,
int const lineColumn);
void ParseCharacterLiteral(
std::string const & line,
int const lineNumber,
unsigned & lineColumn,
std::vector<Token> & tokens) const;
public: public:
void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens); void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);

View File

@@ -1,20 +1,21 @@
#include <map> #include <map>
#include <optional>
#include <stdexcept> #include <stdexcept>
#include <token/errors.hpp> #include <token/errors.hpp>
#include <token/tokenizer.hpp> #include <token/tokenizer.hpp>
namespace Token namespace Token
{ {
std::tuple<int, bool> TryParseInt(std::string const & string) std::optional<int> TryParseInt(std::string const & string)
{ {
try try
{ {
int value = std::stoi(string); int value = std::stoi(string);
return std::make_tuple(value, true); return std::make_optional<int>(value);
} }
catch(std::invalid_argument &) catch(std::invalid_argument &)
{ {
return std::make_tuple(0, false); return std::nullopt;
} }
} }
@@ -42,9 +43,19 @@ namespace Token
case '$': case '$':
{ {
auto const result = TryParseInt(string.substr(1, string.size())); auto const result = TryParseInt(string.substr(1, string.size()));
if (result.has_value())
{
return Token::CreateImmediateValueToken( return Token::CreateImmediateValueToken(
std::get<0>(result), result.value(),
std::get<1>(result), true,
lineNumber,
lineColumn);
}
return Token::CreateImmediateValueToken(
0,
false,
lineNumber, lineNumber,
lineColumn); lineColumn);
} }
@@ -85,9 +96,19 @@ namespace Token
if (memoryPrefix == '$') if (memoryPrefix == '$')
{ {
auto const result = TryParseInt(valueString); auto const result = TryParseInt(valueString);
if (result.has_value())
{
return Token::CreateMemoryToken( return Token::CreateMemoryToken(
std::get<0>(result), result.value(),
std::get<1>(result), true,
lineNumber,
lineColumn);
}
return Token::CreateMemoryToken(
0,
false,
lineNumber, lineNumber,
lineColumn); lineColumn);
} }
@@ -224,6 +245,51 @@ end_state_loop:
} }
} }
// Modifies the lineColumn parameter to point at the character literal end
void Tokenizer::ParseCharacterLiteral(
std::string const & line,
int const lineNumber,
unsigned & lineColumn,
std::vector<Token> & tokens) const
{
for(unsigned int i = lineColumn + 1; i < line.size(); ++i)
{
if (line[i] == '\'')
{
// Character literal must be exactly length 3 for now (2x ' + 1
// character in between)
if (lineColumn + 2u != i)
{
tokens.emplace_back(Token::CreateImmediateValueToken(
0,
false,
lineNumber,
lineColumn));
}
else
{
tokens.emplace_back(Token::CreateImmediateValueToken(
line[i - 1],
true,
lineNumber,
lineColumn));
}
lineColumn = i;
return;
}
}
// Non terminated character literal!
tokens.emplace_back(Token::CreateImmediateValueToken(
0,
false,
lineNumber,
lineColumn));
lineColumn = line.size();
}
void Tokenizer::Tokenize( void Tokenizer::Tokenize(
std::string const & line, std::string const & line,
int const lineNumber, int const lineNumber,
@@ -250,6 +316,12 @@ end_state_loop:
return; return;
} }
if (line[column] == '\'')
{
ParseCharacterLiteral(line, lineNumber, column, tokens);
break;
}
columnTokenStart = column; columnTokenStart = column;
switch(line[column]) switch(line[column])