ayin/src/parser/scanner.rs
2025-12-17 23:20:26 +02:00

258 lines
7.7 KiB
Rust

use super::types::*;
use chumsky::text::Char;
use log;
use lyn::Scanner;
pub fn scan(source: String) -> Tokens {
let mut scanner = Scanner::new(&source);
let mut tokens = Vec::new();
loop {
let start = scanner.cursor();
if let Some(c) = scanner.pop() {
match *c {
',' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::Comma,
}),
':' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::Colon,
}),
';' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::Semicolon,
}),
'{' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::OpenCurly,
}),
'}' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::CloseCurly,
}),
'(' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::OpenParen,
}),
')' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::CloseParen,
}),
'[' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::OpenBracket,
}),
']' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::CloseBracket,
}),
'=' => tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::Equals,
}),
// labels
'.' => {
let mut str = "".to_string();
loop {
if let Some(ch) = scanner.peek()
&& !ch.is_alphanumeric()
&& *ch != '_'
{
break;
}
if let Some(ch) = scanner.pop() {
str.push(*ch);
} else {
break;
}
}
tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: match str.as_str() {
_ => Token::Label(str),
},
});
}
// comments
'#' => {
let mut str = "".to_string();
while let Some(ch) = scanner.pop() {
if ch.is_newline() {
break;
}
str.push(*ch);
}
}
// strings
'"' => {
let mut str = "".to_string();
while let Some(ch) = scanner.pop() {
if *ch == '"' {
break;
}
str.push(*ch);
}
tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::String(str),
});
}
// whitespace
_ if c.is_whitespace() => {}
// numbers
_ if c.is_numeric() => {
let mut str = c.to_string();
loop {
if let Some(ch) = scanner.peek()
&& !ch.is_numeric()
{
break;
}
if let Some(ch) = scanner.pop() {
str.push(*ch);
} else {
break;
}
}
let i = str.parse::<u32>().unwrap();
tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: Token::Number(i),
});
}
// identifiers and keywords
_ if c.is_alphabetic() || *c == '_' => {
let mut str = c.to_string();
loop {
if let Some(ch) = scanner.peek()
&& !ch.is_alphanumeric()
&& *ch != '_'
{
break;
}
if let Some(ch) = scanner.pop() {
str.push(*ch);
} else {
break;
}
}
tokens.push(LocatedToken {
start,
end: scanner.cursor(),
token: match str.as_str() {
"fn" => Token::Fn,
"let" => Token::Let,
"mut" => Token::Mut,
"return" => Token::Return,
"true" => Token::True,
"false" => Token::False,
_ => Token::Identifier(str),
},
});
}
// error
_ => {
log::error!("Unexpected character: {c}");
}
}
} else {
break;
}
}
tokens.into()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn let_number() {
let program = "let x = 108;".to_string();
let mut result = scan(program)
.0
.into_iter()
.map(|t| t.token)
.collect::<Vec<_>>();
result.reverse();
insta::assert_debug_snapshot!(result);
}
#[test]
fn let_fn() {
let program = "
let main = fn (x) {
console.log(x);
};"
.to_string();
let mut result = scan(program)
.0
.into_iter()
.map(|t| t.token)
.collect::<Vec<_>>();
result.reverse();
insta::assert_debug_snapshot!(result);
}
#[test]
fn scaffolding() {
let program = "
let init = fn() {
return {
player: { position: { x: 10, y: 20 }, },
}
};
let update = fn(state, events) {
return state';
};
let draw = fn(frame, state) {
frame.clear(0,0,0);
};
let migrate = fn(state) {
return { player: { pos: state.player.position } },
};
"
.to_string();
let mut result = scan(program)
.0
.into_iter()
.map(|t| t.token)
.collect::<Vec<_>>();
result.reverse();
insta::assert_debug_snapshot!(result);
}
/*
// Errors
#[test]
fn duplicate_toplevel_defs() {
let program = vec![
helpers::define_expr("main", "record".into()),
helpers::define_expr("main", 0.into()),
]
.into();
let result = run(program);
assert_eq!(result, Err(Error::DuplicateNames("main".into())));
}
*/
}