lex: parse tab and exit, add comments
This commit is contained in:
parent
a44abe5e73
commit
2f32c928d6
1 changed files with 25 additions and 2 deletions
|
|
@ -6,56 +6,74 @@ int is_identifier_char(char);
|
||||||
int is_numeric(char);
|
int is_numeric(char);
|
||||||
|
|
||||||
TokenArray lex(char* txt, unsigned length) {
|
TokenArray lex(char* txt, unsigned length) {
|
||||||
|
// The index for the input.
|
||||||
unsigned txt_index = 0;
|
unsigned txt_index = 0;
|
||||||
|
|
||||||
|
// We'll put tokens we lexed successfully here.
|
||||||
Token* tokens = (Token*)malloc(length * sizeof(Token));
|
Token* tokens = (Token*)malloc(length * sizeof(Token));
|
||||||
|
// Where to put the next token in tokens.
|
||||||
unsigned tokens_index = 0;
|
unsigned tokens_index = 0;
|
||||||
|
|
||||||
while (txt_index < length && txt[txt_index] != '\0') {
|
while (txt_index < length && txt[txt_index] != '\0') {
|
||||||
switch (txt[txt_index]) {
|
switch (txt[txt_index]) {
|
||||||
|
// Ignore spaces, tabs and newlines
|
||||||
case ' ': {
|
case ' ': {
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case '\t': {
|
||||||
|
++txt_index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case '\n': {
|
case '\n': {
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// EQUALS
|
||||||
case '=': {
|
case '=': {
|
||||||
tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = EQUALS, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// COMMA
|
||||||
case ',': {
|
case ',': {
|
||||||
tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = COMMA, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// OPENPAREN
|
||||||
case '(': {
|
case '(': {
|
||||||
tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = OPENPAREN, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// CLOSEPAREN
|
||||||
case ')': {
|
case ')': {
|
||||||
tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = CLOSEPAREN, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// OPENCURLY
|
||||||
case '{': {
|
case '{': {
|
||||||
tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = OPENCURLY, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// CLOSECURLY
|
||||||
case '}': {
|
case '}': {
|
||||||
tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
|
tokens[tokens_index] = (Token){ .tag = CLOSECURLY, .data.integer = 0, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
++txt_index;
|
++txt_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// Complex cases and errors.
|
||||||
default: {
|
default: {
|
||||||
|
// Identifiers.
|
||||||
if (is_identifier_char(txt[txt_index])) {
|
if (is_identifier_char(txt[txt_index])) {
|
||||||
char* word = malloc(128);
|
char* word = malloc(128);
|
||||||
unsigned word_index = 0;
|
unsigned word_index = 0;
|
||||||
|
|
@ -66,7 +84,9 @@ TokenArray lex(char* txt, unsigned length) {
|
||||||
}
|
}
|
||||||
tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
|
tokens[tokens_index] = (Token){ .tag = IDENTIFIER, .data.identifier = word, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
} else if (is_numeric(txt[txt_index])) {
|
}
|
||||||
|
// Integers.
|
||||||
|
else if (is_numeric(txt[txt_index])) {
|
||||||
|
|
||||||
char word[9] = { '\0' };
|
char word[9] = { '\0' };
|
||||||
unsigned word_index = 0;
|
unsigned word_index = 0;
|
||||||
|
|
@ -78,8 +98,11 @@ TokenArray lex(char* txt, unsigned length) {
|
||||||
int integer = atoi(word);
|
int integer = atoi(word);
|
||||||
tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
|
tokens[tokens_index] = (Token){ .tag = INTEGER, .data.integer = integer, };
|
||||||
++tokens_index;
|
++tokens_index;
|
||||||
} else {
|
}
|
||||||
|
// Not a token.
|
||||||
|
else {
|
||||||
fprintf(stderr, "unexpected character '%c'", txt[txt_index]);
|
fprintf(stderr, "unexpected character '%c'", txt[txt_index]);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue