/// DreamStack Lexer — tokenizes source into a stream of tokens. #[derive(Debug, Clone, PartialEq)] pub struct Token { pub kind: TokenKind, pub lexeme: String, pub line: usize, pub col: usize, } #[derive(Debug, Clone, PartialEq)] pub enum TokenKind { // Literals Int(i64), Float(f64), StringStart, // opening " StringFragment(String), // literal part of string StringInterp, // { inside string StringEnd, // closing " True, False, // Identifiers & keywords Ident(String), Let, View, Effect, On, When, Each, InKw, Match, If, Then, Else, Perform, Handle, With, Stream, From, Spring, Column, Row, Stack, Panel, List, Form, Scene, Animate, For, In, Component, Route, Navigate, Constrain, Pixel, Delta, Signals, Every, Import, Export, Type, Where, Layout, // Operators Plus, Minus, Star, Slash, Percent, Eq, // = EqEq, // == Neq, // != Lt, // < Gt, // > Lte, // <= Gte, // >= And, // && Or, // || Not, // ! PlusEq, // += MinusEq, // -= Arrow, // -> Pipe, // | Dot, // . // Delimiters LParen, RParen, LBracket, RBracket, LBrace, RBrace, Comma, Colon, Newline, // Special Comment(String), Eof, Error(String), } pub struct Lexer { source: Vec, pos: usize, line: usize, col: usize, in_string: bool, interp_depth: usize, } impl Lexer { pub fn new(source: &str) -> Self { Self { source: source.chars().collect(), pos: 0, line: 1, col: 1, in_string: false, interp_depth: 0, } } pub fn tokenize(&mut self) -> Vec { let mut tokens = Vec::new(); loop { let tok = self.next_token(); let is_eof = tok.kind == TokenKind::Eof; // Skip comments and consecutive newlines match &tok.kind { TokenKind::Comment(_) => continue, TokenKind::Newline => { if tokens.last().is_some_and(|t: &Token| t.kind == TokenKind::Newline) { continue; } } _ => {} } tokens.push(tok); if is_eof { break; } } tokens } fn peek(&self) -> char { self.source.get(self.pos).copied().unwrap_or('\0') } fn peek_next(&self) -> char { self.source.get(self.pos + 1).copied().unwrap_or('\0') } fn advance(&mut self) -> char { let c = self.peek(); self.pos += 1; if c == '\n' { self.line += 1; self.col = 1; } else { self.col += 1; } c } fn make_token(&self, kind: TokenKind, lexeme: &str) -> Token { Token { kind, lexeme: lexeme.to_string(), line: self.line, col: self.col, } } fn skip_whitespace(&mut self) { while self.pos < self.source.len() { match self.peek() { ' ' | '\t' | '\r' => { self.advance(); } _ => break, } } } fn next_token(&mut self) -> Token { // If we're inside string interpolation and hit }, return to string mode if self.in_string && self.interp_depth == 0 { return self.lex_string_continuation(); } self.skip_whitespace(); if self.pos >= self.source.len() { return self.make_token(TokenKind::Eof, ""); } let line = self.line; let col = self.col; let c = self.peek(); let tok = match c { '\n' => { self.advance(); Token { kind: TokenKind::Newline, lexeme: "\n".into(), line, col } } '-' if self.peek_next() == '-' => self.lex_comment(), '-' if self.peek_next() == '>' => { self.advance(); self.advance(); Token { kind: TokenKind::Arrow, lexeme: "->".into(), line, col } } '-' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::MinusEq, lexeme: "-=".into(), line, col } } '+' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::PlusEq, lexeme: "+=".into(), line, col } } '=' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::EqEq, lexeme: "==".into(), line, col } } '!' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::Neq, lexeme: "!=".into(), line, col } } '<' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::Lte, lexeme: "<=".into(), line, col } } '>' if self.peek_next() == '=' => { self.advance(); self.advance(); Token { kind: TokenKind::Gte, lexeme: ">=".into(), line, col } } '&' if self.peek_next() == '&' => { self.advance(); self.advance(); Token { kind: TokenKind::And, lexeme: "&&".into(), line, col } } '|' if self.peek_next() == '|' => { self.advance(); self.advance(); Token { kind: TokenKind::Or, lexeme: "||".into(), line, col } } '+' => { self.advance(); Token { kind: TokenKind::Plus, lexeme: "+".into(), line, col } } '-' => { self.advance(); Token { kind: TokenKind::Minus, lexeme: "-".into(), line, col } } '*' => { self.advance(); Token { kind: TokenKind::Star, lexeme: "*".into(), line, col } } '/' if self.peek_next() == '/' => self.lex_comment(), '/' => { self.advance(); Token { kind: TokenKind::Slash, lexeme: "/".into(), line, col } } '%' => { self.advance(); Token { kind: TokenKind::Percent, lexeme: "%".into(), line, col } } '=' => { self.advance(); Token { kind: TokenKind::Eq, lexeme: "=".into(), line, col } } '<' => { self.advance(); Token { kind: TokenKind::Lt, lexeme: "<".into(), line, col } } '>' => { self.advance(); Token { kind: TokenKind::Gt, lexeme: ">".into(), line, col } } '!' => { self.advance(); Token { kind: TokenKind::Not, lexeme: "!".into(), line, col } } '|' => { self.advance(); Token { kind: TokenKind::Pipe, lexeme: "|".into(), line, col } } '.' => { self.advance(); Token { kind: TokenKind::Dot, lexeme: ".".into(), line, col } } '(' => { self.advance(); Token { kind: TokenKind::LParen, lexeme: "(".into(), line, col } } ')' => { self.advance(); Token { kind: TokenKind::RParen, lexeme: ")".into(), line, col } } '[' => { self.advance(); Token { kind: TokenKind::LBracket, lexeme: "[".into(), line, col } } ']' => { self.advance(); Token { kind: TokenKind::RBracket, lexeme: "]".into(), line, col } } '{' => { self.advance(); if self.in_string { self.interp_depth += 1; } Token { kind: TokenKind::LBrace, lexeme: "{".into(), line, col } } '}' => { self.advance(); if self.interp_depth > 0 { self.interp_depth -= 1; } Token { kind: TokenKind::RBrace, lexeme: "}".into(), line, col } } ',' => { self.advance(); Token { kind: TokenKind::Comma, lexeme: ",".into(), line, col } } ':' => { self.advance(); Token { kind: TokenKind::Colon, lexeme: ":".into(), line, col } } '"' => self.lex_string_start(), c if c.is_ascii_digit() => self.lex_number(), c if c.is_ascii_alphabetic() || c == '_' => self.lex_ident_or_keyword(), _ => { self.advance(); Token { kind: TokenKind::Error(format!("unexpected character: {c}")), lexeme: c.to_string(), line, col } } }; tok } fn lex_comment(&mut self) -> Token { let line = self.line; let col = self.col; self.advance(); // - self.advance(); // - let mut text = String::new(); while self.pos < self.source.len() && self.peek() != '\n' { text.push(self.advance()); } Token { kind: TokenKind::Comment(text.trim().to_string()), lexeme: format!("--{text}"), line, col } } fn lex_number(&mut self) -> Token { let line = self.line; let col = self.col; let mut num = String::new(); let mut is_float = false; while self.pos < self.source.len() && (self.peek().is_ascii_digit() || self.peek() == '.') { if self.peek() == '.' { if is_float { break; } // Check it's not a method call (e.g. `foo.bar`) if self.peek_next().is_ascii_alphabetic() { break; } is_float = true; } num.push(self.advance()); } if is_float { let val: f64 = num.parse().unwrap_or(0.0); Token { kind: TokenKind::Float(val), lexeme: num, line, col } } else { let val: i64 = num.parse().unwrap_or(0); Token { kind: TokenKind::Int(val), lexeme: num, line, col } } } fn lex_ident_or_keyword(&mut self) -> Token { let line = self.line; let col = self.col; let mut ident = String::new(); while self.pos < self.source.len() && (self.peek().is_ascii_alphanumeric() || self.peek() == '_') { ident.push(self.advance()); } let kind = match ident.as_str() { "let" => TokenKind::Let, "view" => TokenKind::View, "effect" => TokenKind::Effect, "on" => TokenKind::On, "when" => TokenKind::When, "each" => TokenKind::Each, "in" => TokenKind::InKw, "match" => TokenKind::Match, "if" => TokenKind::If, "then" => TokenKind::Then, "else" => TokenKind::Else, "perform" => TokenKind::Perform, "handle" => TokenKind::Handle, "with" => TokenKind::With, "stream" => TokenKind::Stream, "from" => TokenKind::From, "spring" => TokenKind::Spring, "constrain" => TokenKind::Constrain, "pixel" => TokenKind::Pixel, "delta" => TokenKind::Delta, "signals" => TokenKind::Signals, "column" => TokenKind::Column, "row" => TokenKind::Row, "stack" => TokenKind::Stack, "panel" => TokenKind::Panel, "list" => TokenKind::List, "form" => TokenKind::Form, "scene" => TokenKind::Scene, "animate" => TokenKind::Animate, "true" => TokenKind::True, "false" => TokenKind::False, "for" => TokenKind::For, "in" => TokenKind::In, "component" => TokenKind::Component, "route" => TokenKind::Route, "navigate" => TokenKind::Navigate, "every" => TokenKind::Every, "import" => TokenKind::Import, "export" => TokenKind::Export, "type" => TokenKind::Type, "where" => TokenKind::Where, "layout" => TokenKind::Layout, _ => TokenKind::Ident(ident.clone()), }; Token { kind, lexeme: ident, line, col } } fn lex_string_start(&mut self) -> Token { let line = self.line; let col = self.col; self.advance(); // consume opening " self.in_string = true; // Now lex the string content self.lex_string_body(line, col) } fn lex_string_continuation(&mut self) -> Token { let line = self.line; let col = self.col; self.lex_string_body(line, col) } fn lex_string_body(&mut self, line: usize, col: usize) -> Token { let mut text = String::new(); while self.pos < self.source.len() { match self.peek() { '"' => { // End of string self.advance(); self.in_string = false; if text.is_empty() { return Token { kind: TokenKind::StringEnd, lexeme: "\"".into(), line, col }; } // Return fragment first, next call will return StringEnd // Actually let's simplify: return the full string as a single token return Token { kind: TokenKind::StringFragment(text.clone()), lexeme: format!("{text}\""), line, col }; } '{' => { if text.is_empty() { // No text before { — emit StringInterp directly self.advance(); self.interp_depth += 1; return Token { kind: TokenKind::StringInterp, lexeme: "{".into(), line, col }; } else { // Text before { — return the text fragment first. // DON'T consume { — the next call to lex_string_body // will see { at position 0 (empty text) and emit StringInterp. return Token { kind: TokenKind::StringFragment(text.clone()), lexeme: text, line, col }; } } '\\' => { self.advance(); match self.peek() { 'n' => { self.advance(); text.push('\n'); } 't' => { self.advance(); text.push('\t'); } '\\' => { self.advance(); text.push('\\'); } '"' => { self.advance(); text.push('"'); } '{' => { self.advance(); text.push('{'); } _ => { text.push('\\'); } } } c => { self.advance(); text.push(c); } } } // Unterminated string Token { kind: TokenKind::Error("unterminated string".into()), lexeme: text, line, col } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_basic_tokens() { let mut lexer = Lexer::new("let count = 0"); let tokens = lexer.tokenize(); assert!(matches!(tokens[0].kind, TokenKind::Let)); assert!(matches!(&tokens[1].kind, TokenKind::Ident(s) if s == "count")); assert!(matches!(tokens[2].kind, TokenKind::Eq)); assert!(matches!(tokens[3].kind, TokenKind::Int(0))); } #[test] fn test_view_declaration() { let mut lexer = Lexer::new("view counter =\n column [\n text label\n ]"); let tokens = lexer.tokenize(); assert!(matches!(tokens[0].kind, TokenKind::View)); assert!(matches!(&tokens[1].kind, TokenKind::Ident(s) if s == "counter")); assert!(matches!(tokens[2].kind, TokenKind::Eq)); assert!(matches!(tokens[3].kind, TokenKind::Newline)); assert!(matches!(tokens[4].kind, TokenKind::Column)); } #[test] fn test_operators() { let mut lexer = Lexer::new("count > 10 && x <= 5"); let tokens = lexer.tokenize(); assert!(matches!(tokens[1].kind, TokenKind::Gt)); assert!(matches!(tokens[3].kind, TokenKind::And)); assert!(matches!(tokens[5].kind, TokenKind::Lte)); } #[test] fn test_arrow() { let mut lexer = Lexer::new("when x > 0 ->"); let tokens = lexer.tokenize(); assert!(matches!(tokens[4].kind, TokenKind::Arrow)); } #[test] fn test_string_simple() { let mut lexer = Lexer::new(r#""hello world""#); let tokens = lexer.tokenize(); assert!(matches!(&tokens[0].kind, TokenKind::StringFragment(s) if s == "hello world")); } #[test] fn test_comment() { let mut lexer = Lexer::new("let x = 5 -- this is a comment\nlet y = 10"); let tokens = lexer.tokenize(); // Comments are skipped assert!(matches!(tokens[0].kind, TokenKind::Let)); assert!(matches!(tokens[3].kind, TokenKind::Int(5))); assert!(matches!(tokens[4].kind, TokenKind::Newline)); assert!(matches!(tokens[5].kind, TokenKind::Let)); } #[test] fn test_slash_comment() { let mut lexer = Lexer::new("// this is a comment\nlet y = 10"); let tokens = lexer.tokenize(); // // comments are also skipped assert!(matches!(tokens[0].kind, TokenKind::Newline)); assert!(matches!(tokens[1].kind, TokenKind::Let)); } #[test] fn test_string_interpolation_tokens() { let mut lexer = Lexer::new(r#""Hello {name}!""#); let tokens = lexer.tokenize(); // Expected: StringFragment("Hello ") → StringInterp → Ident("name") → RBrace → StringFragment("!") assert!(matches!(&tokens[0].kind, TokenKind::StringFragment(s) if s == "Hello ")); assert!(matches!(tokens[1].kind, TokenKind::StringInterp)); assert!(matches!(&tokens[2].kind, TokenKind::Ident(s) if s == "name")); assert!(matches!(tokens[3].kind, TokenKind::RBrace)); assert!(matches!(&tokens[4].kind, TokenKind::StringFragment(s) if s == "!")); } #[test] fn test_string_interpolation_at_start() { let mut lexer = Lexer::new(r#""{count} items""#); let tokens = lexer.tokenize(); // Expected: StringInterp → Ident("count") → RBrace → StringFragment(" items") assert!(matches!(tokens[0].kind, TokenKind::StringInterp)); assert!(matches!(&tokens[1].kind, TokenKind::Ident(s) if s == "count")); assert!(matches!(tokens[2].kind, TokenKind::RBrace)); assert!(matches!(&tokens[3].kind, TokenKind::StringFragment(s) if s == " items")); } #[test] fn test_string_interpolation_multiple() { let mut lexer = Lexer::new(r#""{a} and {b}""#); let tokens = lexer.tokenize(); // StringInterp → Ident(a) → RBrace → StringFragment(" and ") → StringInterp → Ident(b) → RBrace → StringEnd assert!(matches!(tokens[0].kind, TokenKind::StringInterp)); assert!(matches!(&tokens[1].kind, TokenKind::Ident(s) if s == "a")); assert!(matches!(tokens[2].kind, TokenKind::RBrace)); assert!(matches!(&tokens[3].kind, TokenKind::StringFragment(s) if s == " and ")); assert!(matches!(tokens[4].kind, TokenKind::StringInterp)); assert!(matches!(&tokens[5].kind, TokenKind::Ident(s) if s == "b")); assert!(matches!(tokens[6].kind, TokenKind::RBrace)); assert!(matches!(tokens[7].kind, TokenKind::StringEnd)); } #[test] fn test_string_no_interpolation() { let mut lexer = Lexer::new(r#""plain string""#); let tokens = lexer.tokenize(); assert!(matches!(&tokens[0].kind, TokenKind::StringFragment(s) if s == "plain string")); } #[test] fn test_string_escaped_brace() { let mut lexer = Lexer::new(r#""literal \{brace}""#); let tokens = lexer.tokenize(); // \{ should be a literal { in the string, not interpolation assert!(matches!(&tokens[0].kind, TokenKind::StringFragment(s) if s.contains("{"))); } }