lex: parse tab and exit, add comments

This commit is contained in:
me 2026-01-11 10:58:05 +02:00
parent a44abe5e73
commit 2f32c928d6

View file

@ -6,56 +6,74 @@ int is_identifier_char(char);
int is_numeric(char); int is_numeric(char);
TokenArray lex(char* txt, unsigned length) { TokenArray lex(char* txt, unsigned length) {
// The index for the input.
unsigned txt_index = 0; unsigned txt_index = 0;
// We'll put tokens we lexed successfully here.
Token* tokens = (Token*)malloc(length * sizeof(Token)); Token* tokens = (Token*)malloc(length * sizeof(Token));
// Where to put the next token in tokens.
unsigned tokens_index = 0; unsigned tokens_index = 0;
while (txt_index < length && txt[txt_index] != '\0') { while (txt_index < length && txt[txt_index] != '\0') {
switch (txt[txt_index]) { switch (txt[txt_index]) {
// Ignore spaces, tabs and newlines
case ' ': { case ' ': {
++txt_index; ++txt_index;
break; break;
} }
case '\t': {
++txt_index;
break;
}
case '\n': { case '\n': {
++txt_index; ++txt_index;
break; break;
} }
// EQUALS
case '=': { case '=': {
tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// COMMA
case ',': { case ',': {
tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// OPENPAREN
case '(': { case '(': {
tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// CLOSEPAREN
case ')': { case ')': {
tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// OPENCURLY
case '{': { case '{': {
tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// CLOSECURLY
case '}': { case '}': {
tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, }; tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
++tokens_index; ++tokens_index;
++txt_index; ++txt_index;
break; break;
} }
// Complex cases and errors.
default: { default: {
// Identifiers.
if (is_identifier_char(txt[txt_index])) { if (is_identifier_char(txt[txt_index])) {
char* word = malloc(128); char* word = malloc(128);
unsigned word_index = 0; unsigned word_index = 0;
@ -66,7 +84,9 @@ TokenArray lex(char* txt, unsigned length) {
} }
tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, }; tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
++tokens_index; ++tokens_index;
} else if (is_numeric(txt[txt_index])) { }
// Integers.
else if (is_numeric(txt[txt_index])) {
char word[9] = { '\0' }; char word[9] = { '\0' };
unsigned word_index = 0; unsigned word_index = 0;
@ -78,8 +98,11 @@ TokenArray lex(char* txt, unsigned length) {
int integer = atoi(word); int integer = atoi(word);
tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, }; tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
++tokens_index; ++tokens_index;
} else { }
// Not a token.
else {
fprintf(stderr, "unexpected character '%c'", txt[txt_index]); fprintf(stderr, "unexpected character '%c'", txt[txt_index]);
exit(1);
} }
} }
} }