diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4eb6d10..3c9ce2e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,4 @@ +pub mod types; +pub mod scanner; pub mod parser; pub use parser::*; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index e13bfc8..1303941 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,178 +1,4 @@ use crate::ast; -use chumsky::text::Char; -use log; -use lyn::Scanner; - -struct LocatedToken { - start: usize, - end: usize, - token: Token, -} - -#[derive(Debug, Clone, PartialEq)] -enum Token { - Let, - Fn, - Equals, - Semicolon, - OpenParen, - CloseParen, - OpenBracket, - CloseBracket, - OpenCurly, - CloseCurly, - Dot, - Comma, - Name(String), - Number(u32), - String(String), - Identifier(String), -} - -fn scan(source: String) -> Vec { - let mut scanner = Scanner::new(&source); - let mut tokens = Vec::new(); - - loop { - let start = scanner.cursor(); - if let Some(c) = scanner.pop() { - match *c { - '.' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::Dot, - }), - ',' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::Comma, - }), - ';' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::Semicolon, - }), - '{' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::OpenCurly, - }), - '}' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::CloseCurly, - }), - '(' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::OpenParen, - }), - ')' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::CloseParen, - }), - '[' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::OpenBracket, - }), - ']' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::CloseBracket, - }), - '=' => tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::Equals, - }), - // comments - '#' => { - let mut str = "".to_string(); - while let Some(ch) = scanner.pop() { - if ch.is_newline() { - break; - } - str.push(*ch); - } - } - // strings - '"' => { - let mut str = "".to_string(); - while let Some(ch) = scanner.pop() { - if *ch == '"' { - break; - } - str.push(*ch); - } - tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::String(str), - }); - } - _ if c.is_whitespace() => {} - // numbers - _ if c.is_numeric() => { - let mut str = "".to_string(); - loop { - if let Some(ch) = scanner.peek() - && !ch.is_numeric() - { - break; - } - if let Some(ch) = scanner.pop() { - str.push(*ch); - } else { - break; - } - } - let i = str.parse::().unwrap(); - tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: Token::Number(i), - }); - } - // identifiers and keywords - _ if c.is_alphabetic() => { - let mut str = "".to_string(); - loop { - if let Some(ch) = scanner.peek() - && !ch.is_alphanumeric() - { - break; - } - if let Some(ch) = scanner.pop() { - str.push(*ch); - } else { - break; - } - } - - tokens.push(LocatedToken { - start, - end: scanner.cursor(), - token: match str.as_str() { - "fn" => Token::Fn, - "let" => Token::Let, - _ => Token::Identifier(str), - }, - }); - } - // error - _ => { - log::error!("Unexpected character: {c}"); - } - } - } else { - break; - } - } - - tokens -} fn parse_expr(tokens: &mut Vec) -> Result { todo!() diff --git a/src/parser/scanner.rs b/src/parser/scanner.rs new file mode 100644 index 0000000..31e3de3 --- /dev/null +++ b/src/parser/scanner.rs @@ -0,0 +1,149 @@ +use chumsky::text::Char; +use log; +use lyn::Scanner; +use super::types::*; + +fn scan(source: String) -> Vec { + let mut scanner = Scanner::new(&source); + let mut tokens = Vec::new(); + + loop { + let start = scanner.cursor(); + if let Some(c) = scanner.pop() { + match *c { + '.' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::Dot, + }), + ',' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::Comma, + }), + ';' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::Semicolon, + }), + '{' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::OpenCurly, + }), + '}' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::CloseCurly, + }), + '(' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::OpenParen, + }), + ')' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::CloseParen, + }), + '[' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::OpenBracket, + }), + ']' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::CloseBracket, + }), + '=' => tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::Equals, + }), + // comments + '#' => { + let mut str = "".to_string(); + while let Some(ch) = scanner.pop() { + if ch.is_newline() { + break; + } + str.push(*ch); + } + } + // strings + '"' => { + let mut str = "".to_string(); + while let Some(ch) = scanner.pop() { + if *ch == '"' { + break; + } + str.push(*ch); + } + tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::String(str), + }); + } + _ if c.is_whitespace() => {} + // numbers + _ if c.is_numeric() => { + let mut str = "".to_string(); + loop { + if let Some(ch) = scanner.peek() + && !ch.is_numeric() + { + break; + } + if let Some(ch) = scanner.pop() { + str.push(*ch); + } else { + break; + } + } + let i = str.parse::().unwrap(); + tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: Token::Number(i), + }); + } + // identifiers and keywords + _ if c.is_alphabetic() => { + let mut str = "".to_string(); + loop { + if let Some(ch) = scanner.peek() + && !ch.is_alphanumeric() + { + break; + } + if let Some(ch) = scanner.pop() { + str.push(*ch); + } else { + break; + } + } + + tokens.push(LocatedToken { + start, + end: scanner.cursor(), + token: match str.as_str() { + "fn" => Token::Fn, + "let" => Token::Let, + _ => Token::Identifier(str), + }, + }); + } + // error + _ => { + log::error!("Unexpected character: {c}"); + } + } + } else { + break; + } + } + + tokens +} diff --git a/src/parser/types.rs b/src/parser/types.rs new file mode 100644 index 0000000..db742ad --- /dev/null +++ b/src/parser/types.rs @@ -0,0 +1,27 @@ + +#[derive(Debug, Clone, PartialEq, Eq)] +struct LocatedToken { + start: usize, + end: usize, + token: Token, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum Token { + Let, + Fn, + Equals, + Semicolon, + OpenParen, + CloseParen, + OpenBracket, + CloseBracket, + OpenCurly, + CloseCurly, + Dot, + Comma, + Name(String), + Number(u32), + String(String), + Identifier(String), +}