my first interpreter

2026-01-03 23:59:53 +02:00 · 2026-01-03 23:59:53 +02:00 · 16e8ec36e1
commit 16e8ec36e1
parent 40620b6a58
14 changed files with 735 additions and 0 deletions
--- a/first-interpreter/.gitignore
+++ b/first-interpreter/.gitignore
@ -0,0 +1 @@
 _build
--- a/first-interpreter/build.hs
+++ b/first-interpreter/build.hs
@ -0,0 +1,46 @@
 #!/usr/bin/env cabal
 {- cabal:
 build-depends:
  base,
  bytestring,
  cereal,
  hspec,
  process,
  shake,
 -}
 -- initial version taken from https://shakebuild.com/manual
 import Development.Shake
 import Development.Shake.FilePath
 import Development.Shake.Util
 cc = "gcc"
 flags = "-ggdb"
 clearCcCache = removeFilesAfter "build" ["//*"]
 main :: IO ()
 main = shakeArgs shakeOptions{shakeFiles="_build"} $ do
  want ["_build/interpreter" <.> exe]
  phony "clean" $ do
    putInfo "Cleaning files in _build"
    removeFilesAfter "_build" ["//*"]
    clearCcCache
    removeFilesAfter "newdist" ["//*"]
    cmd_ "cabal" "clean"
  phony "run" $ do
    need ["_build/interpreter" <.> exe]
    putInfo "Running interpreter"
    cmd_ "_build/interpreter" ["program.code"]
  "_build/interpreter" <.> exe %> \out -> do
    cs <- getDirectoryFiles "src" ["//*.c"]
    let os = ["_build" </> "normal" </> "src" </> c -<.> "o" | c <- cs]
    need os
    cmd_ (cc <> " " <> flags <> " -o") [out] os
  "_build/normal//*.o" %> \out -> do
    let c = dropDirectory1 $ dropDirectory1 $ out -<.> "c"
    let m = out -<.> "m"
    cmd_ (cc <> " " <> flags <> " -c") [c] "-o" [out] "-MMD -MF" [m]
    neededMakefileDependencies m
--- a/first-interpreter/program.code
+++ b/first-interpreter/program.code
@ -0,0 +1,9 @@
 sum = 0
 counter = 10
 while counter {
    sum = add(sum, counter)
    counter = add(counter, negate(1))
 }
 print(sum)
--- a/first-interpreter/src/ast.c
+++ b/first-interpreter/src/ast.c
@ -0,0 +1,57 @@
 #include "ast.h"
 #include <stdio.h>
 void print_expr(Expr expr) {
    switch (expr.tag) {
    case LITERAL:
        printf("%d", expr.data.integer);
        break;
    case VARIABLE:
        printf("%s", expr.data.variable);
        break;
    case FUNCTION: {
        printf("%s(", expr.data.function.name);
        for (unsigned i = 0; i < expr.data.function.args.length; ++i) {
            print_expr(((Expr*)expr.data.function.args.elements)[i]);
            if (i +1 < expr.data.function.args.length) {
                printf(", ");
            }
        }
        printf(")");
        break;
    }
    }
 }
 void print_stmt(Stmt stmt, unsigned indentation) {
    for (unsigned i = 0; i < indentation; ++i) {
        printf("    ");
    }
    switch (stmt.tag) {
    case SET:
        printf("%s = ", stmt.data.Set.name);
        print_expr(stmt.data.Set.expr);
        break;
    case WHILE:
        printf("while ");
        print_expr(stmt.data.While.condition);
        printf(" {\n");
        for (unsigned i = 0; i < stmt.data.While.block.length; ++i) {
            print_stmt(((Stmt*)stmt.data.While.block.elements)[i], indentation + 1);
            printf("\n");
        }
        printf("}");
        break;
    case EXPR:
        print_expr(stmt.data.Expr.expr);
        break;
    }
 }
 void print_ast(StmtArray stmts) {
    for (unsigned i = 0; i < stmts.length; ++i) {
        print_stmt(stmts.stmts[i], 0);
        printf("\n");
    }
 }
--- a/first-interpreter/src/ast.h
+++ b/first-interpreter/src/ast.h
@ -0,0 +1,43 @@
 #ifndef INTERPRETER_AST_H
 #define INTERPRETER_AST_H
 typedef struct {
    void* elements;
    unsigned length;
 } Array;
 typedef struct Expr {
    enum {
        LITERAL,
        VARIABLE,
        FUNCTION,
    } tag;
    union {
        int integer;
        char* variable;
        struct { char* name; Array args; } function;
    } data;
 } Expr;
 typedef struct {
    enum {
        SET,
        WHILE,
        EXPR,
    } tag;
    union {
        struct { char* name; Expr expr; } Set;
        struct { Expr condition; Array block; } While;
        struct { Expr expr; } Expr;
    } data;
 } Stmt;
 typedef struct {
    Stmt* stmts;
    unsigned length;
 } StmtArray;
 void print_expr(Expr);
 void print_ast(StmtArray);
 #endif
--- a/first-interpreter/src/execute.c
+++ b/first-interpreter/src/execute.c
@ -0,0 +1,175 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include "ast.h"
 typedef struct {
    unsigned* i;
    Array memory;
 } Memory;
 struct Cell {
    char* name;
    unsigned index;
    struct Cell* next;
 };
 typedef struct {
    struct Cell* next;
 } Environment;
 Memory new_memory() {
    unsigned size = 1024;
    unsigned* i = (unsigned*)malloc(sizeof(unsigned));
    *i = 0;
    int* mem = (int*)malloc(sizeof(int) * size);
    return (Memory){
        .i = i,
        .memory = {
            .length = size,
            .elements = mem,
        }
    };
 }
 Environment new_environment() {
    return (Environment) { .next = NULL };
 }
 int lookup_env(char* name, Environment env) {
    struct Cell* cell = env.next;
    while (cell != NULL) {
        if (strcmp(cell->name, name) == 0) {
            return cell->index;
        } else {
            cell = cell->next;
        }
    }
    return -1;
 }
 Environment insert(char* name, int n, Memory memory, Environment env) {
    if (*memory.i + 1 < memory.memory.length) {
        ((int*)(memory.memory.elements))[*memory.i] = n;
        struct Cell* cell = (struct Cell*)malloc(sizeof(struct Cell));
        *cell = (struct Cell) {
            .name = name,
            .index = *memory.i,
            .next = env.next,
        };
        ++*memory.i;
        Environment new_env = (Environment) {.next = cell};
        return new_env;
    }
    printf("Error: out of memory.");
    exit(1);
 }
 int* lookup_mem(int index, Memory mem) {
    if (index >= 0 && (unsigned)index < *(mem.i)) {
        return &((int*)(mem.memory.elements))[index];
    }
    return NULL;
 }
 int* lookup(char* name, Memory memory, Environment env) {
    int index = lookup_env(name, env);
    if (index >= 0) {
        int* result = lookup_mem(index, memory);
        if (result != NULL) {
            return result;
        }
    }
    return NULL;
 }
 int eval_expr(Expr expr, Memory memory, Environment env) {
    switch (expr.tag) {
    case LITERAL: {
        return expr.data.integer;
        break;
    }
    case VARIABLE: {
        int* result = lookup(expr.data.variable, memory, env);
        if (result == NULL) {
            printf("Error: variable not found '%s'\n", expr.data.variable);
            exit(1);
        } else {
            return *result;
        }
        break;
    }
    case FUNCTION: {
        if (strcmp(expr.data.function.name, "print") == 0) {
            if (expr.data.function.args.length == 1) {
                int arg = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env);
                printf("%d\n", arg);
                return 0;
            } else {
                printf("Error: print expects a single argument.\n");
                exit(1);
            }
        }
        else if (strcmp(expr.data.function.name, "add") == 0) {
            if (expr.data.function.args.length == 2) {
                int arg1 = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env);
                int arg2 = eval_expr(((Expr*)expr.data.function.args.elements)[1], memory, env);
                return arg1 + arg2;
            } else {
                printf("Error: negate expects a single argument.\n");
                exit(1);
            }
        }
        else if (strcmp(expr.data.function.name, "negate") == 0) {
            if (expr.data.function.args.length == 1) {
                int arg = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env);
                return 0 - arg;
            } else {
                printf("Error: negate expects a single argument.\n");
                exit(1);
            }
        }
        return 0;
        break;
    }
    }
 }
 Environment interpret_stmt(Stmt stmt, Memory memory, Environment env) {
    switch (stmt.tag) {
    case SET: {
        int result = eval_expr(stmt.data.Set.expr, memory, env);
        int* index = lookup(stmt.data.Set.name, memory, env);
        if (index != NULL) {
            *index = result;
        } else {
            return insert(stmt.data.Set.name, result, memory, env);
        }
        break;
    }
    case WHILE: {
        while (eval_expr(stmt.data.While.condition, memory, env)) {
            Environment new_env = env;
            for (unsigned i = 0; i < stmt.data.While.block.length; ++i) {
                Stmt current = ((Stmt*)stmt.data.While.block.elements)[i];
                new_env = interpret_stmt(current, memory, new_env);
            }
        }
        break;
    }
    case EXPR:
        eval_expr(stmt.data.Expr.expr, memory, env);
        break;
    }
    return env;
 }
 void execute(StmtArray stmts) {
    Memory memory = new_memory();
    Environment environment = new_environment();
    for (unsigned pc = 0; pc < stmts.length; ++pc) {
        Stmt stmt = stmts.stmts[pc];
        environment = interpret_stmt(stmt, memory, environment);
    }
 }
--- a/first-interpreter/src/execute.h
+++ b/first-interpreter/src/execute.h
@ -0,0 +1,8 @@
 #ifndef EXECUTE_H
 #define EXECUTE_H
 #include "ast.h"
 void execute(StmtArray);
 #endif
--- a/first-interpreter/src/interpreter.c
+++ b/first-interpreter/src/interpreter.c
@ -0,0 +1,24 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "lexer.h"
 #include "parser.h"
 #include "execute.h"
 #include "ast.h"
 void run(char* code, unsigned length) {
    TokenArray tokens = scan(code, length);
    puts("Tokens:\n");
    print_TokenArray(tokens);
    puts("\n");
    StmtArray program = parse_program(tokens);
    puts("\nProgram:\n");
    print_ast(program);
    puts("\n");
    puts("\nRun:\n");
    execute(program);
    puts("\n");
 }
--- a/first-interpreter/src/interpreter.h
+++ b/first-interpreter/src/interpreter.h
@ -0,0 +1,6 @@
 #ifndef INTERPRETER_H
 #define INTERPRETER_H
 void run(char*, unsigned);
 #endif
--- a/first-interpreter/src/lexer.c
+++ b/first-interpreter/src/lexer.c
@ -0,0 +1,127 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "lexer.h"
 TokenArray scan(char* txt, unsigned length) {
    unsigned txt_index = 0;
    Token* tokens = (Token*)malloc(length * sizeof(Token));
    unsigned tokens_index = 0;
    while (txt_index < length && txt[txt_index] != '\0') {
        switch (txt[txt_index]) {
        case ' ': {
            ++txt_index;
            break;
        }
        case '\n': {
            ++txt_index;
            break;
        }
        case '=': {
            tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        case ',': {
            tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        case '(': {
            tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        case ')': {
            tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        case '{': {
            tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        case '}': {
            tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
            ++tokens_index;
            ++txt_index;
            break;
        }
        default: {
            if (is_alpha(txt[txt_index])) {
                char* word = malloc(128);
                unsigned word_index = 0;
                while (txt_index < length && txt[txt_index] != '\0' && is_alpha(txt[txt_index]) && word_index < 128) {
                    word[word_index] = txt[txt_index];
                    ++word_index;
                    ++txt_index;
                }
                tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
                ++tokens_index;
            } else if (is_numeric(txt[txt_index])) {
                char word[9] = { '\0' };
                unsigned word_index = 0;
                while (txt_index < length && txt[txt_index] != '\0' && is_numeric(txt[txt_index]) && word_index < 9) {
                    word[word_index] = txt[txt_index];
                    ++word_index;
                    ++txt_index;
                }
                int integer = atoi(word);
                tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
                ++tokens_index;
            } else {
                printf("unexpected character '%c'", txt[txt_index]);
            }
        }
        }
    }
    return (TokenArray){ .tokens = tokens, .length = tokens_index };
 }
 int is_alpha(char c) {
    return c == '_' || ('a' <= c && c <= 'z');
 }
 int is_numeric(char c) {
    return ('0' <= c && c <= '9');
 }
 void print_TokenArray(TokenArray tokens) {
    unsigned token_index = 0;
    while (token_index < tokens.length) {
        Token token = tokens.tokens[token_index];
        ++token_index;
        switch (token.tag) {
        case IDENTIFIER:
            printf("'%s' ", token.data.identifier);
            break;
        case INTEGER:
            printf("%d ", token.data.integer);
            break;
        case OPENPAREN:
            printf("( ");
            break;
        case CLOSEPAREN:
            printf(") ");
            break;
        case OPENCURLY:
            printf("{ ");
            break;
        case CLOSECURLY:
            printf("} ");
            break;
        case EQUALS:
            printf("= ");
            break;
        case COMMA:
            printf(", ");
            break;
        }
    }
 }
--- a/first-interpreter/src/lexer.h
+++ b/first-interpreter/src/lexer.h
@ -0,0 +1,34 @@
 #ifndef LEXER_H
 #define LEXER_H
 typedef enum {
    IDENTIFIER,
    INTEGER,
    OPENPAREN,
    CLOSEPAREN,
    OPENCURLY,
    CLOSECURLY,
    EQUALS,
    COMMA,
 } TokenTag;
 typedef struct {
    TokenTag tag;
    union {
        char* identifier;
        int integer;
    } data;
 } Token;
 typedef struct {
    Token* tokens;
    unsigned length;
 } TokenArray;
 TokenArray scan(char*, unsigned);
 int is_alpha(char);
 int is_numeric(char);
 void print_TokenArray(TokenArray);
 #endif
--- a/first-interpreter/src/main.c
+++ b/first-interpreter/src/main.c
@ -0,0 +1,29 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "interpreter.h"
 int main(int argc, char** argv) {
    // Read file
    if (argc != 2) {
        puts("USAGE: interpreter FILE\n");
        exit(1);
    }
    FILE* file = fopen(argv[1], "r");
    if (!file) {
        printf("Error opening file: %s", argv[1]);
        exit(1);
    }
    fseek(file, 0, SEEK_END);
    unsigned file_size = ftell(file);
    rewind(file);
    char* txt = alloca(file_size);
    fread(txt, 1, file_size, file);
    // Run
    run(txt, file_size);
    return 0;
 }
--- a/first-interpreter/src/parser.c
+++ b/first-interpreter/src/parser.c
@ -0,0 +1,167 @@
 #include "parser.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 int is_while(char* string) {
    if (strcmp("while", string) == 0) { return 1; }
    return 0;
 }
 int maybe_parse_token(TokenArray tokens, unsigned* tokens_index, TokenTag tag) {
    if (*tokens_index < tokens.length) {
        Token token = tokens.tokens[*tokens_index];
        if (token.tag == tag) {
            ++(*tokens_index);
            return 1;
        }
    }
    return 0;
 }
 void parse_token(TokenArray tokens, unsigned* tokens_index, TokenTag tag) {
    if (!(maybe_parse_token(tokens, tokens_index, tag))) {
        printf("Parse error: unexpected token. Expected %d\n", tag);
        exit(1);
    }
 }
 char* parse_identifier(TokenArray tokens, unsigned* tokens_index) {
    if (*tokens_index < tokens.length) {
        Token token = tokens.tokens[*tokens_index];
        ++(*tokens_index);
        if (token.tag != IDENTIFIER) {
            printf("Parse error: got wrong token: %d, expected IDENTIFIER\n", token.tag);
            exit(1);
        }
        return token.data.identifier;
    } else {
        printf("Parse error: unexpected end of text.\n");
        exit(1);
    }
 }
 Expr parse_expr(TokenArray tokens, unsigned* tokens_index) ;
 Expr parse_function(TokenArray tokens, unsigned* tokens_index, char* name) {
    parse_token(tokens, tokens_index, OPENPAREN);
    Expr* exprs = (Expr*)malloc((tokens.length - *tokens_index) * sizeof(Expr));
    unsigned expr_index = 0;
    while (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag != CLOSEPAREN) {
        exprs[expr_index] = parse_expr(tokens, tokens_index);
        ++expr_index;
        if (!maybe_parse_token(tokens, tokens_index, COMMA)) {
            break;
        }
    }
    parse_token(tokens, tokens_index, CLOSEPAREN);
    return (Expr) {
        .tag = FUNCTION,
        .data.function = {
            .name = name,
            .args = (Array) {
                .elements = exprs,
                .length = expr_index,
            },
        },
    };
 }
 Expr parse_expr(TokenArray tokens, unsigned* tokens_index) {
    if (*tokens_index < tokens.length) {
        Token token = tokens.tokens[*tokens_index];
        ++(*tokens_index);
        switch (token.tag) {
        case INTEGER: {
            return (Expr) {
                .tag = LITERAL,
                .data.integer = token.data.integer,
            };
        }
        case IDENTIFIER: {
            if (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag == OPENPAREN) {
                return parse_function(tokens, tokens_index, token.data.identifier);
            } else {
                return (Expr) {
                    .tag = VARIABLE,
                    .data.variable = token.data.identifier,
                };
            }
        }
        default:
            printf("Parse error: got wrong token: %d, expected IDENTIFIER or INTEGER\n", token.tag);
            exit(1);
        }
    } else {
        printf("Parse error: unexpected end of text.\n");
        exit(1);
    }
 }
 StmtArray parse_block(TokenArray tokens, unsigned* tokens_index);
 Stmt parse_stmt(TokenArray tokens, unsigned* tokens_index) {
    char* identifier = parse_identifier(tokens, tokens_index);
    if (is_while(identifier)) {
        Expr expr = parse_expr(tokens, tokens_index);
        StmtArray block = parse_block(tokens, tokens_index);
        return (Stmt) {
            .tag = WHILE,
            .data.While = {
                .condition = expr,
                .block = {
                    .elements = block.stmts,
                    .length = block.length,
                },
            },
        };
    } else if (maybe_parse_token(tokens, tokens_index, EQUALS)) {
        Expr expr = parse_expr(tokens, tokens_index);
        return (Stmt) {
            .tag = SET,
            .data.Set = {
                .name = identifier,
                .expr = expr,
            },
        };
    } else {
        Expr expr = parse_function(tokens, tokens_index, identifier);
        return (Stmt) {
            .tag = EXPR,
            .data.Expr = {
                .expr = expr,
            },
        };
    }
 }
 StmtArray parse_block(TokenArray tokens, unsigned* tokens_index) {
    parse_token(tokens, tokens_index, OPENCURLY);
    Stmt* stmts = (Stmt*)malloc((tokens.length - *tokens_index) * sizeof(Stmt));
    unsigned stmt_index = 0;
    while (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag != CLOSECURLY) {
        stmts[stmt_index] = parse_stmt(tokens, tokens_index);
        ++stmt_index;
    }
    parse_token(tokens, tokens_index, CLOSECURLY);
    return (StmtArray) {
        .stmts = stmts,
        .length = stmt_index,
    };
 }
 StmtArray parse_program(TokenArray tokens) {
    unsigned tokens_index = 0;
    unsigned stmt_index = 0;
    Stmt* stmts = (Stmt*)malloc(tokens.length * sizeof(Stmt));
    while (tokens_index < tokens.length) {
        stmts[stmt_index] = parse_stmt(tokens, &tokens_index);
        ++stmt_index;
    }
    return (StmtArray){ .stmts = stmts, .length = stmt_index };
 }
--- a/first-interpreter/src/parser.h
+++ b/first-interpreter/src/parser.h
@ -0,0 +1,9 @@
 #ifndef PARSER_H
 #define PARSER_H
 #include "lexer.h"
 #include "ast.h"
 StmtArray parse_program(TokenArray tokens);
 #endif