From 16e8ec36e13bd0b071670bca740918b146a4d86f Mon Sep 17 00:00:00 2001 From: me Date: Sat, 3 Jan 2026 23:59:53 +0200 Subject: [PATCH] my first interpreter --- first-interpreter/.gitignore | 1 + first-interpreter/build.hs | 46 ++++++++ first-interpreter/program.code | 9 ++ first-interpreter/src/ast.c | 57 +++++++++ first-interpreter/src/ast.h | 43 +++++++ first-interpreter/src/execute.c | 175 ++++++++++++++++++++++++++++ first-interpreter/src/execute.h | 8 ++ first-interpreter/src/interpreter.c | 24 ++++ first-interpreter/src/interpreter.h | 6 + first-interpreter/src/lexer.c | 127 ++++++++++++++++++++ first-interpreter/src/lexer.h | 34 ++++++ first-interpreter/src/main.c | 29 +++++ first-interpreter/src/parser.c | 167 ++++++++++++++++++++++++++ first-interpreter/src/parser.h | 9 ++ 14 files changed, 735 insertions(+) create mode 100644 first-interpreter/.gitignore create mode 100755 first-interpreter/build.hs create mode 100644 first-interpreter/program.code create mode 100644 first-interpreter/src/ast.c create mode 100644 first-interpreter/src/ast.h create mode 100644 first-interpreter/src/execute.c create mode 100644 first-interpreter/src/execute.h create mode 100644 first-interpreter/src/interpreter.c create mode 100644 first-interpreter/src/interpreter.h create mode 100644 first-interpreter/src/lexer.c create mode 100644 first-interpreter/src/lexer.h create mode 100644 first-interpreter/src/main.c create mode 100644 first-interpreter/src/parser.c create mode 100644 first-interpreter/src/parser.h diff --git a/first-interpreter/.gitignore b/first-interpreter/.gitignore new file mode 100644 index 0000000..e35d885 --- /dev/null +++ b/first-interpreter/.gitignore @@ -0,0 +1 @@ +_build diff --git a/first-interpreter/build.hs b/first-interpreter/build.hs new file mode 100755 index 0000000..d24ec78 --- /dev/null +++ b/first-interpreter/build.hs @@ -0,0 +1,46 @@ +#!/usr/bin/env cabal +{- cabal: +build-depends: + base, + bytestring, + cereal, + hspec, + process, + shake, +-} +-- initial version taken from https://shakebuild.com/manual +import Development.Shake +import Development.Shake.FilePath +import Development.Shake.Util + +cc = "gcc" +flags = "-ggdb" +clearCcCache = removeFilesAfter "build" ["//*"] + +main :: IO () +main = shakeArgs shakeOptions{shakeFiles="_build"} $ do + want ["_build/interpreter" <.> exe] + + phony "clean" $ do + putInfo "Cleaning files in _build" + removeFilesAfter "_build" ["//*"] + clearCcCache + removeFilesAfter "newdist" ["//*"] + cmd_ "cabal" "clean" + + phony "run" $ do + need ["_build/interpreter" <.> exe] + putInfo "Running interpreter" + cmd_ "_build/interpreter" ["program.code"] + + "_build/interpreter" <.> exe %> \out -> do + cs <- getDirectoryFiles "src" ["//*.c"] + let os = ["_build" "normal" "src" c -<.> "o" | c <- cs] + need os + cmd_ (cc <> " " <> flags <> " -o") [out] os + + "_build/normal//*.o" %> \out -> do + let c = dropDirectory1 $ dropDirectory1 $ out -<.> "c" + let m = out -<.> "m" + cmd_ (cc <> " " <> flags <> " -c") [c] "-o" [out] "-MMD -MF" [m] + neededMakefileDependencies m diff --git a/first-interpreter/program.code b/first-interpreter/program.code new file mode 100644 index 0000000..891a896 --- /dev/null +++ b/first-interpreter/program.code @@ -0,0 +1,9 @@ +sum = 0 +counter = 10 + +while counter { + sum = add(sum, counter) + counter = add(counter, negate(1)) +} + +print(sum) diff --git a/first-interpreter/src/ast.c b/first-interpreter/src/ast.c new file mode 100644 index 0000000..9ba8a72 --- /dev/null +++ b/first-interpreter/src/ast.c @@ -0,0 +1,57 @@ +#include "ast.h" +#include + +void print_expr(Expr expr) { + switch (expr.tag) { + case LITERAL: + printf("%d", expr.data.integer); + break; + case VARIABLE: + printf("%s", expr.data.variable); + break; + case FUNCTION: { + printf("%s(", expr.data.function.name); + for (unsigned i = 0; i < expr.data.function.args.length; ++i) { + print_expr(((Expr*)expr.data.function.args.elements)[i]); + if (i +1 < expr.data.function.args.length) { + printf(", "); + } + } + printf(")"); + break; + } + } +} + +void print_stmt(Stmt stmt, unsigned indentation) { + for (unsigned i = 0; i < indentation; ++i) { + printf(" "); + } + switch (stmt.tag) { + case SET: + printf("%s = ", stmt.data.Set.name); + print_expr(stmt.data.Set.expr); + break; + case WHILE: + printf("while "); + print_expr(stmt.data.While.condition); + printf(" {\n"); + + for (unsigned i = 0; i < stmt.data.While.block.length; ++i) { + print_stmt(((Stmt*)stmt.data.While.block.elements)[i], indentation + 1); + printf("\n"); + } + printf("}"); + break; + case EXPR: + print_expr(stmt.data.Expr.expr); + break; + } +} + +void print_ast(StmtArray stmts) { + for (unsigned i = 0; i < stmts.length; ++i) { + print_stmt(stmts.stmts[i], 0); + printf("\n"); + } +} diff --git a/first-interpreter/src/ast.h b/first-interpreter/src/ast.h new file mode 100644 index 0000000..38a3c13 --- /dev/null +++ b/first-interpreter/src/ast.h @@ -0,0 +1,43 @@ +#ifndef INTERPRETER_AST_H +#define INTERPRETER_AST_H + +typedef struct { + void* elements; + unsigned length; +} Array; + +typedef struct Expr { + enum { + LITERAL, + VARIABLE, + FUNCTION, + } tag; + union { + int integer; + char* variable; + struct { char* name; Array args; } function; + } data; +} Expr; + +typedef struct { + enum { + SET, + WHILE, + EXPR, + } tag; + union { + struct { char* name; Expr expr; } Set; + struct { Expr condition; Array block; } While; + struct { Expr expr; } Expr; + } data; +} Stmt; + +typedef struct { + Stmt* stmts; + unsigned length; +} StmtArray; + +void print_expr(Expr); +void print_ast(StmtArray); + +#endif diff --git a/first-interpreter/src/execute.c b/first-interpreter/src/execute.c new file mode 100644 index 0000000..0e18459 --- /dev/null +++ b/first-interpreter/src/execute.c @@ -0,0 +1,175 @@ +#include +#include +#include + +#include "ast.h" + +typedef struct { + unsigned* i; + Array memory; +} Memory; + +struct Cell { + char* name; + unsigned index; + struct Cell* next; +}; + +typedef struct { + struct Cell* next; +} Environment; + +Memory new_memory() { + unsigned size = 1024; + unsigned* i = (unsigned*)malloc(sizeof(unsigned)); + *i = 0; + int* mem = (int*)malloc(sizeof(int) * size); + return (Memory){ + .i = i, + .memory = { + .length = size, + .elements = mem, + } + }; +} + +Environment new_environment() { + return (Environment) { .next = NULL }; +} + +int lookup_env(char* name, Environment env) { + struct Cell* cell = env.next; + while (cell != NULL) { + if (strcmp(cell->name, name) == 0) { + return cell->index; + } else { + cell = cell->next; + } + } + return -1; +} + +Environment insert(char* name, int n, Memory memory, Environment env) { + if (*memory.i + 1 < memory.memory.length) { + ((int*)(memory.memory.elements))[*memory.i] = n; + struct Cell* cell = (struct Cell*)malloc(sizeof(struct Cell)); + *cell = (struct Cell) { + .name = name, + .index = *memory.i, + .next = env.next, + }; + ++*memory.i; + Environment new_env = (Environment) {.next = cell}; + return new_env; + } + printf("Error: out of memory."); + exit(1); +} + +int* lookup_mem(int index, Memory mem) { + if (index >= 0 && (unsigned)index < *(mem.i)) { + return &((int*)(mem.memory.elements))[index]; + } + return NULL; +} + +int* lookup(char* name, Memory memory, Environment env) { + int index = lookup_env(name, env); + if (index >= 0) { + int* result = lookup_mem(index, memory); + if (result != NULL) { + return result; + } + } + return NULL; +} + +int eval_expr(Expr expr, Memory memory, Environment env) { + switch (expr.tag) { + case LITERAL: { + return expr.data.integer; + break; + } + case VARIABLE: { + int* result = lookup(expr.data.variable, memory, env); + if (result == NULL) { + printf("Error: variable not found '%s'\n", expr.data.variable); + exit(1); + } else { + return *result; + } + break; + } + case FUNCTION: { + if (strcmp(expr.data.function.name, "print") == 0) { + if (expr.data.function.args.length == 1) { + int arg = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env); + printf("%d\n", arg); + return 0; + } else { + printf("Error: print expects a single argument.\n"); + exit(1); + } + } + else if (strcmp(expr.data.function.name, "add") == 0) { + if (expr.data.function.args.length == 2) { + int arg1 = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env); + int arg2 = eval_expr(((Expr*)expr.data.function.args.elements)[1], memory, env); + return arg1 + arg2; + } else { + printf("Error: negate expects a single argument.\n"); + exit(1); + } + } + else if (strcmp(expr.data.function.name, "negate") == 0) { + if (expr.data.function.args.length == 1) { + int arg = eval_expr(((Expr*)expr.data.function.args.elements)[0], memory, env); + return 0 - arg; + } else { + printf("Error: negate expects a single argument.\n"); + exit(1); + } + } + return 0; + break; + } + } +} + +Environment interpret_stmt(Stmt stmt, Memory memory, Environment env) { + switch (stmt.tag) { + case SET: { + int result = eval_expr(stmt.data.Set.expr, memory, env); + int* index = lookup(stmt.data.Set.name, memory, env); + if (index != NULL) { + *index = result; + } else { + return insert(stmt.data.Set.name, result, memory, env); + } + break; + } + case WHILE: { + while (eval_expr(stmt.data.While.condition, memory, env)) { + Environment new_env = env; + for (unsigned i = 0; i < stmt.data.While.block.length; ++i) { + Stmt current = ((Stmt*)stmt.data.While.block.elements)[i]; + new_env = interpret_stmt(current, memory, new_env); + } + } + break; + } + case EXPR: + eval_expr(stmt.data.Expr.expr, memory, env); + break; + } + return env; +} + +void execute(StmtArray stmts) { + Memory memory = new_memory(); + Environment environment = new_environment(); + for (unsigned pc = 0; pc < stmts.length; ++pc) { + Stmt stmt = stmts.stmts[pc]; + environment = interpret_stmt(stmt, memory, environment); + } +} diff --git a/first-interpreter/src/execute.h b/first-interpreter/src/execute.h new file mode 100644 index 0000000..bc0ce5a --- /dev/null +++ b/first-interpreter/src/execute.h @@ -0,0 +1,8 @@ +#ifndef EXECUTE_H +#define EXECUTE_H + +#include "ast.h" + +void execute(StmtArray); + +#endif diff --git a/first-interpreter/src/interpreter.c b/first-interpreter/src/interpreter.c new file mode 100644 index 0000000..09f3515 --- /dev/null +++ b/first-interpreter/src/interpreter.c @@ -0,0 +1,24 @@ +#include +#include +#include "lexer.h" +#include "parser.h" +#include "execute.h" +#include "ast.h" + +void run(char* code, unsigned length) { + TokenArray tokens = scan(code, length); + + puts("Tokens:\n"); + print_TokenArray(tokens); + puts("\n"); + + StmtArray program = parse_program(tokens); + + puts("\nProgram:\n"); + print_ast(program); + puts("\n"); + + puts("\nRun:\n"); + execute(program); + puts("\n"); +} diff --git a/first-interpreter/src/interpreter.h b/first-interpreter/src/interpreter.h new file mode 100644 index 0000000..41e6108 --- /dev/null +++ b/first-interpreter/src/interpreter.h @@ -0,0 +1,6 @@ +#ifndef INTERPRETER_H +#define INTERPRETER_H + +void run(char*, unsigned); + +#endif diff --git a/first-interpreter/src/lexer.c b/first-interpreter/src/lexer.c new file mode 100644 index 0000000..5962b10 --- /dev/null +++ b/first-interpreter/src/lexer.c @@ -0,0 +1,127 @@ +#include +#include +#include "lexer.h" + +TokenArray scan(char* txt, unsigned length) { + unsigned txt_index = 0; + Token* tokens = (Token*)malloc(length * sizeof(Token)); + unsigned tokens_index = 0; + while (txt_index < length && txt[txt_index] != '\0') { + switch (txt[txt_index]) { + case ' ': { + ++txt_index; + break; + } + case '\n': { + ++txt_index; + break; + } + case '=': { + tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + case ',': { + tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + case '(': { + tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + case ')': { + tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + case '{': { + tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + case '}': { + tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, }; + ++tokens_index; + ++txt_index; + break; + } + default: { + if (is_alpha(txt[txt_index])) { + char* word = malloc(128); + unsigned word_index = 0; + while (txt_index < length && txt[txt_index] != '\0' && is_alpha(txt[txt_index]) && word_index < 128) { + word[word_index] = txt[txt_index]; + ++word_index; + ++txt_index; + } + tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, }; + ++tokens_index; + } else if (is_numeric(txt[txt_index])) { + + char word[9] = { '\0' }; + unsigned word_index = 0; + while (txt_index < length && txt[txt_index] != '\0' && is_numeric(txt[txt_index]) && word_index < 9) { + word[word_index] = txt[txt_index]; + ++word_index; + ++txt_index; + } + int integer = atoi(word); + tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, }; + ++tokens_index; + } else { + printf("unexpected character '%c'", txt[txt_index]); + } + } + } + } + return (TokenArray){ .tokens = tokens, .length = tokens_index }; +} + +int is_alpha(char c) { + return c == '_' || ('a' <= c && c <= 'z'); +} + +int is_numeric(char c) { + return ('0' <= c && c <= '9'); +} + +void print_TokenArray(TokenArray tokens) { + unsigned token_index = 0; + while (token_index < tokens.length) { + Token token = tokens.tokens[token_index]; + ++token_index; + switch (token.tag) { + case IDENTIFIER: + printf("'%s' ", token.data.identifier); + break; + case INTEGER: + printf("%d ", token.data.integer); + break; + case OPENPAREN: + printf("( "); + break; + case CLOSEPAREN: + printf(") "); + break; + case OPENCURLY: + printf("{ "); + break; + case CLOSECURLY: + printf("} "); + break; + case EQUALS: + printf("= "); + break; + case COMMA: + printf(", "); + break; + } + } +} diff --git a/first-interpreter/src/lexer.h b/first-interpreter/src/lexer.h new file mode 100644 index 0000000..fc468d3 --- /dev/null +++ b/first-interpreter/src/lexer.h @@ -0,0 +1,34 @@ +#ifndef LEXER_H +#define LEXER_H + +typedef enum { + IDENTIFIER, + INTEGER, + OPENPAREN, + CLOSEPAREN, + OPENCURLY, + CLOSECURLY, + EQUALS, + COMMA, +} TokenTag; + +typedef struct { + TokenTag tag; + union { + char* identifier; + int integer; + } data; +} Token; + +typedef struct { + Token* tokens; + unsigned length; +} TokenArray; + +TokenArray scan(char*, unsigned); +int is_alpha(char); +int is_numeric(char); + +void print_TokenArray(TokenArray); + +#endif diff --git a/first-interpreter/src/main.c b/first-interpreter/src/main.c new file mode 100644 index 0000000..55ec140 --- /dev/null +++ b/first-interpreter/src/main.c @@ -0,0 +1,29 @@ +#include +#include +#include "interpreter.h" + +int main(int argc, char** argv) { + // Read file + if (argc != 2) { + puts("USAGE: interpreter FILE\n"); + exit(1); + } + + FILE* file = fopen(argv[1], "r"); + if (!file) { + printf("Error opening file: %s", argv[1]); + exit(1); + } + fseek(file, 0, SEEK_END); + unsigned file_size = ftell(file); + rewind(file); + + char* txt = alloca(file_size); + + fread(txt, 1, file_size, file); + + // Run + run(txt, file_size); + + return 0; +} diff --git a/first-interpreter/src/parser.c b/first-interpreter/src/parser.c new file mode 100644 index 0000000..e39cf8f --- /dev/null +++ b/first-interpreter/src/parser.c @@ -0,0 +1,167 @@ +#include "parser.h" +#include +#include +#include + +int is_while(char* string) { + if (strcmp("while", string) == 0) { return 1; } + return 0; +} + +int maybe_parse_token(TokenArray tokens, unsigned* tokens_index, TokenTag tag) { + if (*tokens_index < tokens.length) { + Token token = tokens.tokens[*tokens_index]; + if (token.tag == tag) { + ++(*tokens_index); + return 1; + } + } + return 0; +} + +void parse_token(TokenArray tokens, unsigned* tokens_index, TokenTag tag) { + if (!(maybe_parse_token(tokens, tokens_index, tag))) { + printf("Parse error: unexpected token. Expected %d\n", tag); + exit(1); + } +} + +char* parse_identifier(TokenArray tokens, unsigned* tokens_index) { + if (*tokens_index < tokens.length) { + Token token = tokens.tokens[*tokens_index]; + ++(*tokens_index); + if (token.tag != IDENTIFIER) { + printf("Parse error: got wrong token: %d, expected IDENTIFIER\n", token.tag); + exit(1); + } + return token.data.identifier; + } else { + printf("Parse error: unexpected end of text.\n"); + exit(1); + } +} + +Expr parse_expr(TokenArray tokens, unsigned* tokens_index) ; +Expr parse_function(TokenArray tokens, unsigned* tokens_index, char* name) { + parse_token(tokens, tokens_index, OPENPAREN); + Expr* exprs = (Expr*)malloc((tokens.length - *tokens_index) * sizeof(Expr)); + unsigned expr_index = 0; + while (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag != CLOSEPAREN) { + exprs[expr_index] = parse_expr(tokens, tokens_index); + ++expr_index; + if (!maybe_parse_token(tokens, tokens_index, COMMA)) { + break; + } + } + parse_token(tokens, tokens_index, CLOSEPAREN); + return (Expr) { + .tag = FUNCTION, + .data.function = { + .name = name, + .args = (Array) { + .elements = exprs, + .length = expr_index, + }, + }, + }; +} + +Expr parse_expr(TokenArray tokens, unsigned* tokens_index) { + if (*tokens_index < tokens.length) { + Token token = tokens.tokens[*tokens_index]; + ++(*tokens_index); + switch (token.tag) { + case INTEGER: { + return (Expr) { + .tag = LITERAL, + .data.integer = token.data.integer, + }; + } + case IDENTIFIER: { + if (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag == OPENPAREN) { + return parse_function(tokens, tokens_index, token.data.identifier); + } else { + return (Expr) { + .tag = VARIABLE, + .data.variable = token.data.identifier, + }; + } + } + default: + printf("Parse error: got wrong token: %d, expected IDENTIFIER or INTEGER\n", token.tag); + exit(1); + } + } else { + printf("Parse error: unexpected end of text.\n"); + exit(1); + } +} + +StmtArray parse_block(TokenArray tokens, unsigned* tokens_index); + +Stmt parse_stmt(TokenArray tokens, unsigned* tokens_index) { + char* identifier = parse_identifier(tokens, tokens_index); + + if (is_while(identifier)) { + Expr expr = parse_expr(tokens, tokens_index); + + StmtArray block = parse_block(tokens, tokens_index); + return (Stmt) { + .tag = WHILE, + .data.While = { + .condition = expr, + .block = { + .elements = block.stmts, + .length = block.length, + }, + }, + }; + } else if (maybe_parse_token(tokens, tokens_index, EQUALS)) { + Expr expr = parse_expr(tokens, tokens_index); + + return (Stmt) { + .tag = SET, + .data.Set = { + .name = identifier, + .expr = expr, + }, + }; + } else { + Expr expr = parse_function(tokens, tokens_index, identifier); + + return (Stmt) { + .tag = EXPR, + .data.Expr = { + .expr = expr, + }, + }; + } +} + +StmtArray parse_block(TokenArray tokens, unsigned* tokens_index) { + parse_token(tokens, tokens_index, OPENCURLY); + Stmt* stmts = (Stmt*)malloc((tokens.length - *tokens_index) * sizeof(Stmt)); + unsigned stmt_index = 0; + while (*tokens_index < tokens.length && tokens.tokens[*tokens_index].tag != CLOSECURLY) { + stmts[stmt_index] = parse_stmt(tokens, tokens_index); + ++stmt_index; + } + parse_token(tokens, tokens_index, CLOSECURLY); + return (StmtArray) { + .stmts = stmts, + .length = stmt_index, + }; +} + + +StmtArray parse_program(TokenArray tokens) { + unsigned tokens_index = 0; + unsigned stmt_index = 0; + Stmt* stmts = (Stmt*)malloc(tokens.length * sizeof(Stmt)); + + while (tokens_index < tokens.length) { + stmts[stmt_index] = parse_stmt(tokens, &tokens_index); + ++stmt_index; + } + return (StmtArray){ .stmts = stmts, .length = stmt_index }; +} diff --git a/first-interpreter/src/parser.h b/first-interpreter/src/parser.h new file mode 100644 index 0000000..c832692 --- /dev/null +++ b/first-interpreter/src/parser.h @@ -0,0 +1,9 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "lexer.h" +#include "ast.h" + +StmtArray parse_program(TokenArray tokens); + +#endif