From 2f32c928d6cae60b532b498d72897a7fabe135e3 Mon Sep 17 00:00:00 2001 From: me Date: Sun, 11 Jan 2026 10:58:05 +0200 Subject: [PATCH] lex: parse tab and exit, add comments --- first-interpreter/src/lex.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/first-interpreter/src/lex.c b/first-interpreter/src/lex.c index 7218460..fab6ed0 100644 --- a/first-interpreter/src/lex.c +++ b/first-interpreter/src/lex.c @@ -6,56 +6,74 @@ int is_identifier_char(char); int is_numeric(char); TokenArray lex(char* txt, unsigned length) { + // The index for the input. unsigned txt_index = 0; + + // We'll put tokens we lexed successfully here. Token* tokens = (Token*)malloc(length * sizeof(Token)); + // Where to put the next token in tokens. unsigned tokens_index = 0; + while (txt_index < length && txt[txt_index] != '\0') { switch (txt[txt_index]) { + // Ignore spaces, tabs and newlines case ' ': { ++txt_index; break; } + case '\t': { + ++txt_index; + break; + } case '\n': { ++txt_index; break; } + // EQUALS case '=': { tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // COMMA case ',': { tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // OPENPAREN case '(': { tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // CLOSEPAREN case ')': { tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // OPENCURLY case '{': { tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // CLOSECURLY case '}': { tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, }; ++tokens_index; ++txt_index; break; } + // Complex cases and errors. default: { + // Identifiers. if (is_identifier_char(txt[txt_index])) { char* word = malloc(128); unsigned word_index = 0; @@ -66,7 +84,9 @@ TokenArray lex(char* txt, unsigned length) { } tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, }; ++tokens_index; - } else if (is_numeric(txt[txt_index])) { + } + // Integers. + else if (is_numeric(txt[txt_index])) { char word[9] = { '\0' }; unsigned word_index = 0; @@ -78,8 +98,11 @@ TokenArray lex(char* txt, unsigned length) { int integer = atoi(word); tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, }; ++tokens_index; - } else { + } + // Not a token. + else { fprintf(stderr, "unexpected character '%c'", txt[txt_index]); + exit(1); } } }