diff options
| author | Mica White <botahamec@outlook.com> | 2025-12-07 14:25:35 -0500 |
|---|---|---|
| committer | Mica White <botahamec@outlook.com> | 2025-12-07 14:25:35 -0500 |
| commit | cfa44907065eb10e3b990506881b30c5891f0af2 (patch) | |
| tree | 90e2b818376a01294c8cc96184171861035c7319 /src/ast.rs | |
Diffstat (limited to 'src/ast.rs')
| -rw-r--r-- | src/ast.rs | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..5584975 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,343 @@ +use std::iter::Peekable; +use std::sync::Arc; + +use rust_decimal::Decimal; + +use crate::tokens::{Lexer, Token}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Program { + pub commands: Arc<[Command]>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Command { + Statement(Statement), + Expression(Expression), + PanicMode(Arc<[Token]>), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Statement { + pub function_name: Arc<str>, + pub function_token: Token, + pub args: Arc<[Expression]>, + pub newline: Option<Token>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Expression { + pub prefix: Arc<[Token]>, + pub suffix: ExpressionSuffix, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExpressionSuffix { + Nothing, + Number { value: Decimal, token: Token }, + String { value: Arc<str>, token: Token }, + Identifier { name: Arc<str>, token: Token }, + List(List), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct List { + pub left_paren: Token, + pub items: Arc<[ListItem]>, + pub right_paren: Option<Token>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ListItem { + Dot(Token), + Expression(Expression), +} + +impl Command { + pub fn is_empty(&self) -> bool { + if let Self::PanicMode(tokens) = self { + tokens.is_empty() + } else { + false + } + } + + pub fn expression(&self) -> Option<&Expression> { + if let Self::Expression(expr) = self { + Some(expr) + } else { + None + } + } + + pub fn statement(&self) -> Option<&Statement> { + if let Self::Statement(statement) = self { + Some(statement) + } else { + None + } + } +} + +impl Expression { + pub fn is_quoted(&self) -> bool { + self.prefix.len() == 1 && self.prefix[0].is_apostrophe() + } +} + +impl ExpressionSuffix { + pub fn is_empty(&self) -> bool { + matches!(self, Self::Nothing) + } + + pub fn number(&self) -> Option<&Decimal> { + if let Self::Number { value, .. } = self { + Some(value) + } else { + None + } + } + + pub fn string(&self) -> Option<&Arc<str>> { + if let Self::String { value, .. } = self { + Some(value) + } else { + None + } + } + + pub fn identifier(&self) -> Option<&Arc<str>> { + if let Self::Identifier { name, .. } = self { + Some(name) + } else { + None + } + } + + pub fn list(&self) -> Option<&List> { + if let Self::List(list) = self { + Some(list) + } else { + None + } + } +} + +impl List { + pub fn contains_dot(&self) -> bool { + self.items.iter().any(|item| item.dot().is_some()) + } +} + +impl ListItem { + pub fn dot(&self) -> Option<&Token> { + if let Self::Dot(token) = self { + Some(token) + } else { + None + } + } + + pub fn expression(&self) -> Option<&Expression> { + if let Self::Expression(expression) = self { + Some(expression) + } else { + None + } + } +} + +fn parse_many<R>( + lexer: &mut Peekable<Lexer>, + predicate: fn(&Token) -> bool, + parser: fn(&mut Peekable<Lexer>) -> Result<R, String>, +) -> Result<Vec<R>, String> { + let mut items = Vec::new(); + while let Some(next) = lexer.peek() { + if !predicate(next) { + break; + } + + items.push(parser(lexer)?) + } + + Ok(items) +} + +fn parse_if<R>( + lexer: &mut Peekable<Lexer>, + predicate: fn(&Token) -> bool, + parser: fn(&mut Peekable<Lexer>) -> Result<R, String>, +) -> Option<Result<R, String>> { + let next = lexer.peek()?; + predicate(next).then(|| parser(lexer)) +} + +fn skip_whitespace_and_comments(lexer: &mut Peekable<Lexer>) { + while lexer + .next_if(|token| token.is_comment() || token.is_whitespace()) + .is_some() + {} +} + +fn skip_spaces_and_comments(lexer: &mut Peekable<Lexer>) { + while lexer + .next_if(|token: &Token| { + (token.is_whitespace() && !token.contains_newline()) || token.is_comment() + }) + .is_some() + {} +} + +fn skip_non_expression_tokens(lexer: &mut Peekable<Lexer>) -> Vec<Token> { + let mut tokens = Vec::new(); + while let Some(token) = lexer.next_if(|token| !begins_expression(token)) { + tokens.push(token); + } + + tokens +} + +fn begins_expression(token: &Token) -> bool { + match &token.ty { + crate::tokens::TokenType::Whitespace(_) => false, + crate::tokens::TokenType::LineComment(_) => false, + crate::tokens::TokenType::BlockComment { .. } => false, + crate::tokens::TokenType::LeftParenthesis => true, + crate::tokens::TokenType::RightParenthesis => false, + crate::tokens::TokenType::Apostrophe => true, + crate::tokens::TokenType::Pound => true, + crate::tokens::TokenType::Dot => false, + crate::tokens::TokenType::Identifier(_) => true, + crate::tokens::TokenType::String { .. } => true, + crate::tokens::TokenType::Number(_) => true, + } +} + +fn begins_list_item(token: &Token) -> bool { + begins_expression(token) || token.is_dot() +} + +pub fn parse_program(lexer: &mut Peekable<Lexer>) -> Result<Program, String> { + skip_whitespace_and_comments(lexer); + let mut commands = parse_many(lexer, |_| true, parse_command)?; + if commands.last().map(|c| c.is_empty()).unwrap_or(false) { + commands.pop(); + } + + let commands = commands.into(); + Ok(Program { commands }) +} + +fn parse_command(lexer: &mut Peekable<Lexer>) -> Result<Command, String> { + skip_whitespace_and_comments(lexer); + if let Some(result) = parse_if(lexer, |token| token.is_identifier(), parse_statement) { + result.map(Command::Statement) + } else if let Some(result) = parse_if(lexer, begins_expression, parse_expression) { + result.map(Command::Expression) + } else { + Ok(Command::PanicMode(skip_non_expression_tokens(lexer).into())) + } +} + +fn parse_statement(lexer: &mut Peekable<Lexer>) -> Result<Statement, String> { + skip_whitespace_and_comments(lexer); + let function_token = lexer + .next() + .ok_or_else(|| String::from("expected a function name"))?; + let function_name = function_token + .identifier() + .ok_or_else(|| String::from("expected the function name to be an identifier"))? + .into(); + skip_spaces_and_comments(lexer); + let args = parse_many(lexer, begins_expression, parse_expression)?.into(); + skip_spaces_and_comments(lexer); + let newline = lexer.next_if(|token| token.contains_newline()); + + Ok(Statement { + function_name, + function_token, + args, + newline, + }) +} + +fn parse_expression(lexer: &mut Peekable<Lexer>) -> Result<Expression, String> { + skip_whitespace_and_comments(lexer); + let prefix = parse_many( + lexer, + |token| token.is_pound() || token.is_apostrophe(), + |parser| { + let result = parser.next().ok_or(String::new()); + skip_whitespace_and_comments(parser); + result + }, + )? + .into(); + skip_whitespace_and_comments(lexer); + let suffix = parse_expression_suffix(lexer)?; + skip_spaces_and_comments(lexer); + + Ok(Expression { prefix, suffix }) +} + +fn parse_expression_suffix(lexer: &mut Peekable<Lexer>) -> Result<ExpressionSuffix, String> { + skip_whitespace_and_comments(lexer); + if let Some(list) = parse_if(lexer, |token| token.is_left_parenthesis(), parse_list) { + list.map(ExpressionSuffix::List) + } else if let Some(identifier) = lexer.next_if(|token| token.is_identifier()) { + Ok(ExpressionSuffix::Identifier { + name: identifier + .identifier() + .ok_or_else(|| String::from("we just checked for an identifier"))? + .into(), + token: identifier, + }) + } else if let Some(string) = lexer.next_if(|token| token.is_string()) { + Ok(ExpressionSuffix::String { + value: string + .computed_string() + .ok_or_else(|| String::from("we just checked for an string"))? + .into(), + token: string, + }) + } else if let Some(number) = lexer.next_if(|token| token.is_number()) { + Ok(ExpressionSuffix::Number { + value: *number + .number() + .ok_or_else(|| String::from("we just checked for a number"))?, + token: number, + }) + } else { + Ok(ExpressionSuffix::Nothing) + } +} + +fn parse_list(lexer: &mut Peekable<Lexer>) -> Result<List, String> { + skip_whitespace_and_comments(lexer); + let left_paren = lexer + .next_if(|token| token.is_left_parenthesis()) + .ok_or_else(|| String::from("Unexpected token. Expected a left parenthesis"))?; + skip_whitespace_and_comments(lexer); + let items = parse_many(lexer, begins_list_item, parse_list_item)?.into(); + skip_whitespace_and_comments(lexer); + let right_paren = lexer.next_if(|token| token.is_right_parenthesis()); + + Ok(List { + left_paren, + items, + right_paren, + }) +} + +fn parse_list_item(lexer: &mut Peekable<Lexer>) -> Result<ListItem, String> { + skip_whitespace_and_comments(lexer); + if let Some(dot) = lexer.next_if(|token| token.is_dot()) { + skip_whitespace_and_comments(lexer); + Ok(ListItem::Dot(dot)) + } else { + let result = parse_expression(lexer).map(ListItem::Expression); + skip_whitespace_and_comments(lexer); + result + } +} |
