Introduce simple character literal
This commit is contained in:
@@ -23,7 +23,9 @@ afterwards can be a bit cryptic as to where it originated.
|
||||
|
||||
- `[operation][number type]`, e.g. `divi` for divide (div) integer
|
||||
- `%[register]` for addressing registers
|
||||
- `$[value]` for using literals/immediate values
|
||||
- `$[value]` for using immediate (literal) integer values
|
||||
- `'a'` for using immediate character values (currently only supports non
|
||||
escaped characters)
|
||||
- `;` for end of statement (mandatory)
|
||||
- `[label]:` for labels
|
||||
- `#[text]` for comments: any text is ignored till a newline (`\n`) is found
|
||||
@@ -63,7 +65,8 @@ The following whitespace characters are used to separate symbols:
|
||||
- newline (`\n`)
|
||||
|
||||
The following characters are used as identifiers:
|
||||
- dollar (`$`) for immediate (literal) values
|
||||
- dollar (`$`) for immediate (literal) integer values
|
||||
- single quote (`'`) for immediate character values
|
||||
- percentage (`%`) for register identifiers
|
||||
- colon (`:`) for jump labels
|
||||
- semicolon (`;`) for statement termination
|
||||
@@ -86,7 +89,7 @@ second byte of location `$900`).
|
||||
All symbols are reserved keywords and can therefore NOT be used as labels.
|
||||
There is currently no strict checking, so be careful.
|
||||
|
||||
## Preprocessor
|
||||
### Preprocessor
|
||||
|
||||
All preprocessor directives are prefixed by a `#`. Ill formed preprocessor
|
||||
directives do not halt compilation, they are merely reported and then ignored.
|
||||
|
||||
@@ -24,9 +24,9 @@ jmp count_loop;
|
||||
|
||||
#DEFINE PRINT_CHAR $0
|
||||
# Hello world
|
||||
seti %A $72; # H
|
||||
seti %A 'H'; # H
|
||||
int PRINT_CHAR;
|
||||
seti %A $101; # e
|
||||
seti %A 'e'; # e
|
||||
int PRINT_CHAR;
|
||||
seti %A $108; # l
|
||||
int PRINT_CHAR;
|
||||
|
||||
@@ -10,8 +10,21 @@ namespace Token
|
||||
private:
|
||||
std::vector<std::pair<std::string, std::string>> substitutions;
|
||||
|
||||
Token ExtractToken(std::string string, int const lineNumber, int const lineColumn) const;
|
||||
void ParseComment(std::string const & string, int const lineNumber, int const lineColumn);
|
||||
Token ExtractToken(
|
||||
std::string string,
|
||||
int const lineNumber,
|
||||
int const lineColumn) const;
|
||||
|
||||
void ParseComment(
|
||||
std::string const & string,
|
||||
int const lineNumber,
|
||||
int const lineColumn);
|
||||
|
||||
void ParseCharacterLiteral(
|
||||
std::string const & line,
|
||||
int const lineNumber,
|
||||
unsigned & lineColumn,
|
||||
std::vector<Token> & tokens) const;
|
||||
|
||||
public:
|
||||
void Tokenize(std::string const & line, int const lineNumber, std::vector<Token> & tokens);
|
||||
|
||||
@@ -1,20 +1,21 @@
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <token/errors.hpp>
|
||||
#include <token/tokenizer.hpp>
|
||||
|
||||
namespace Token
|
||||
{
|
||||
std::tuple<int, bool> TryParseInt(std::string const & string)
|
||||
std::optional<int> TryParseInt(std::string const & string)
|
||||
{
|
||||
try
|
||||
{
|
||||
int value = std::stoi(string);
|
||||
return std::make_tuple(value, true);
|
||||
return std::make_optional<int>(value);
|
||||
}
|
||||
catch(std::invalid_argument &)
|
||||
{
|
||||
return std::make_tuple(0, false);
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,9 +43,19 @@ namespace Token
|
||||
case '$':
|
||||
{
|
||||
auto const result = TryParseInt(string.substr(1, string.size()));
|
||||
|
||||
if (result.has_value())
|
||||
{
|
||||
return Token::CreateImmediateValueToken(
|
||||
result.value(),
|
||||
true,
|
||||
lineNumber,
|
||||
lineColumn);
|
||||
}
|
||||
|
||||
return Token::CreateImmediateValueToken(
|
||||
std::get<0>(result),
|
||||
std::get<1>(result),
|
||||
0,
|
||||
false,
|
||||
lineNumber,
|
||||
lineColumn);
|
||||
}
|
||||
@@ -85,9 +96,19 @@ namespace Token
|
||||
if (memoryPrefix == '$')
|
||||
{
|
||||
auto const result = TryParseInt(valueString);
|
||||
|
||||
if (result.has_value())
|
||||
{
|
||||
return Token::CreateMemoryToken(
|
||||
result.value(),
|
||||
true,
|
||||
lineNumber,
|
||||
lineColumn);
|
||||
}
|
||||
|
||||
return Token::CreateMemoryToken(
|
||||
std::get<0>(result),
|
||||
std::get<1>(result),
|
||||
0,
|
||||
false,
|
||||
lineNumber,
|
||||
lineColumn);
|
||||
}
|
||||
@@ -224,6 +245,51 @@ end_state_loop:
|
||||
}
|
||||
}
|
||||
|
||||
// Modifies the lineColumn parameter to point at the character literal end
|
||||
void Tokenizer::ParseCharacterLiteral(
|
||||
std::string const & line,
|
||||
int const lineNumber,
|
||||
unsigned & lineColumn,
|
||||
std::vector<Token> & tokens) const
|
||||
{
|
||||
for(unsigned int i = lineColumn + 1; i < line.size(); ++i)
|
||||
{
|
||||
if (line[i] == '\'')
|
||||
{
|
||||
// Character literal must be exactly length 3 for now (2x ' + 1
|
||||
// character in between)
|
||||
if (lineColumn + 2u != i)
|
||||
{
|
||||
tokens.emplace_back(Token::CreateImmediateValueToken(
|
||||
0,
|
||||
false,
|
||||
lineNumber,
|
||||
lineColumn));
|
||||
}
|
||||
else
|
||||
{
|
||||
tokens.emplace_back(Token::CreateImmediateValueToken(
|
||||
line[i - 1],
|
||||
true,
|
||||
lineNumber,
|
||||
lineColumn));
|
||||
}
|
||||
|
||||
lineColumn = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Non terminated character literal!
|
||||
tokens.emplace_back(Token::CreateImmediateValueToken(
|
||||
0,
|
||||
false,
|
||||
lineNumber,
|
||||
lineColumn));
|
||||
|
||||
lineColumn = line.size();
|
||||
}
|
||||
|
||||
void Tokenizer::Tokenize(
|
||||
std::string const & line,
|
||||
int const lineNumber,
|
||||
@@ -250,6 +316,12 @@ end_state_loop:
|
||||
return;
|
||||
}
|
||||
|
||||
if (line[column] == '\'')
|
||||
{
|
||||
ParseCharacterLiteral(line, lineNumber, column, tokens);
|
||||
break;
|
||||
}
|
||||
|
||||
columnTokenStart = column;
|
||||
|
||||
switch(line[column])
|
||||
|
||||
Reference in New Issue
Block a user