lex: parse tab and exit, add comments

2026-01-11 10:58:05 +02:00 · 2026-01-11 10:58:05 +02:00 · 2f32c928d6
commit 2f32c928d6
parent a44abe5e73
1 changed files with 25 additions and 2 deletions
--- a/first-interpreter/src/lex.c
+++ b/first-interpreter/src/lex.c
@ -6,56 +6,74 @@ int is_identifier_char(char);
 int is_numeric(char);

 TokenArray lex(char* txt, unsigned length) {
+    // The index for the input.
    unsigned txt_index = 0;
+
+    // We'll put tokens we lexed successfully here.
    Token* tokens = (Token*)malloc(length * sizeof(Token));
+    // Where to put the next token in tokens.
    unsigned tokens_index = 0;
+
    while (txt_index < length && txt[txt_index] != '\0') {
        switch (txt[txt_index]) {
+        // Ignore spaces, tabs and newlines
        case ' ': {
            ++txt_index;
            break;
        }
+        case '\t': {
+            ++txt_index;
+            break;
+        }
        case '\n': {
            ++txt_index;
            break;
        }
+        // EQUALS
        case '=': {
            tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // COMMA
        case ',': {
            tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // OPENPAREN
        case '(': {
            tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // CLOSEPAREN
        case ')': {
            tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // OPENCURLY
        case '{': {
            tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // CLOSECURLY
        case '}': {
            tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
+        // Complex cases and errors.
        default: {
+            // Identifiers.
            if (is_identifier_char(txt[txt_index])) {
                char* word = malloc(128);
                unsigned word_index = 0;
@ -66,7 +84,9 @@ TokenArray lex(char* txt, unsigned length) {
                }
                tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
                ++tokens_index;
-            } else if (is_numeric(txt[txt_index])) {
+            }
+            // Integers.
+			else if (is_numeric(txt[txt_index])) {

                char word[9] = { '\0' };
                unsigned word_index = 0;
@ -78,8 +98,11 @@ TokenArray lex(char* txt, unsigned length) {
                int integer = atoi(word);
                tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
                ++tokens_index;
-            } else {
+            }
+            // Not a token.
+			else {
                fprintf(stderr, "unexpected character '%c'", txt[txt_index]);
+				exit(1);
            }
        }
        }