lex: parse tab and exit, add comments

This commit is contained in:
me 2026-01-11 10:58:05 +02:00
parent a44abe5e73
commit 2f32c928d6

View file

@ -6,56 +6,74 @@ int is_identifier_char(char);
int is_numeric(char);
TokenArray lex(char* txt, unsigned length) {
// The index for the input.
unsigned txt_index = 0;
// We'll put tokens we lexed successfully here.
Token* tokens = (Token*)malloc(length * sizeof(Token));
// Where to put the next token in tokens.
unsigned tokens_index = 0;
while (txt_index < length && txt[txt_index] != '\0') {
switch (txt[txt_index]) {
// Ignore spaces, tabs and newlines
case ' ': {
++txt_index;
break;
}
case '\t': {
++txt_index;
break;
}
case '\n': {
++txt_index;
break;
}
// EQUALS
case '=': {
tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// COMMA
case ',': {
tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// OPENPAREN
case '(': {
tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// CLOSEPAREN
case ')': {
tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// OPENCURLY
case '{': {
tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// CLOSECURLY
case '}': {
tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
++tokens_index;
++txt_index;
break;
}
// Complex cases and errors.
default: {
// Identifiers.
if (is_identifier_char(txt[txt_index])) {
char* word = malloc(128);
unsigned word_index = 0;
@ -66,7 +84,9 @@ TokenArray lex(char* txt, unsigned length) {
}
tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
++tokens_index;
} else if (is_numeric(txt[txt_index])) {
}
// Integers.
else if (is_numeric(txt[txt_index])) {
char word[9] = { '\0' };
unsigned word_index = 0;
@ -78,8 +98,11 @@ TokenArray lex(char* txt, unsigned length) {
int integer = atoi(word);
tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
++tokens_index;
} else {
}
// Not a token.
else {
fprintf(stderr, "unexpected character '%c'", txt[txt_index]);
exit(1);
}
}
}