From 6e8c97e00284849626c4d085064b197b963405f4 Mon Sep 17 00:00:00 2001 From: walonCode Date: Fri, 26 Jun 2026 07:18:08 +0000 Subject: [PATCH 1/5] fix(parser): replace {:?} debug output with human-readable token names, add switch default arm --- src/ast/ast.rs | 1 + src/parser/parser.rs | 111 +++++++++++++++++++++++++++++++++---------- src/token/token.rs | 2 + 3 files changed, 89 insertions(+), 25 deletions(-) diff --git a/src/ast/ast.rs b/src/ast/ast.rs index c4a7fa0..44946f4 100644 --- a/src/ast/ast.rs +++ b/src/ast/ast.rs @@ -287,6 +287,7 @@ pub enum Expression { Switch { subject: Box, arms: Vec, + default:Option>, line: usize, column: usize, end_line: usize, diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 5c72c1c..56d1fff 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -130,8 +130,9 @@ impl Parser { got: self.cur_token.clone(), }, message: format!( - "expected identifier in array destructuring pattern, got {:?}", - self.cur_token.token_type + "expected indentifier in array destructuring pattern, got {:?}", + // token_type_name(&TokenType::Ident), + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -163,7 +164,7 @@ impl Parser { }, message: format!( "expected identifier in hash destructuring pattern, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -184,7 +185,7 @@ impl Parser { }, message: format!( "expected identifier for destructuring alias, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -566,9 +567,37 @@ impl Parser { } let mut arms = Vec::new(); + let mut default: Option> = None; while !self.peak_token_is(&TokenType::RBrace) && !self.peak_token_is(&TokenType::EOF) { - self.next_token(); // cur = pattern expression start + self.next_token(); // cur = pattern or 'default' + + if self.cur_token_is(&TokenType::Default) { + if !self.expect_peak(TokenType::FatArrow) { + return None; + } + let body = if self.peak_token_is(&TokenType::LBrace) { + self.next_token(); + self.parse_block_statement()? + } else { + self.next_token(); + let body_line = self.cur_token.line; + let body_col = self.cur_token.column; + let expr = self.parse_expression(Precedences::Lowest)?; + Statement::Expression { + expr, + line: body_line, + column: body_col, + end_line: self.cur_token.line, + end_column: self.cur_token.column + 1, + } + }; + if self.peak_token_is(&TokenType::Comma) { + self.next_token(); + } + default = Some(Box::new(body)); + continue; + } let pattern = self.parse_expression(Precedences::Lowest)?; @@ -584,7 +613,6 @@ impl Parser { let body_line = self.cur_token.line; let body_col = self.cur_token.column; let expr = self.parse_expression(Precedences::Lowest)?; - // cur_token is last token of expr Statement::Expression { expr, line: body_line, @@ -594,25 +622,25 @@ impl Parser { } }; + if self.peak_token_is(&TokenType::Comma) { + self.next_token(); + } + arms.push(SwitchArm { pattern, body: Box::new(body), }); - - if self.peak_token_is(&TokenType::Comma) { - self.next_token(); - } } if !self.expect_peak(TokenType::RBrace) { return None; } - // cur_token is '}' let end_line = self.cur_token.line; let end_column = self.cur_token.column + 1; Some(Expression::Switch { subject: Box::new(subject), + default, arms, line, column, @@ -635,7 +663,7 @@ impl Parser { }, message: format!( "expected loop variable name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -658,7 +686,7 @@ impl Parser { }, message: format!( "expected second loop variable name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -806,7 +834,7 @@ impl Parser { }, message: format!( "expected parameter name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1239,7 +1267,7 @@ impl Parser { }, message: format!( "'pub' can only precede 'let' or 'const', got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1272,7 +1300,7 @@ impl Parser { expected: TokenType::Ident(String::new()), got: self.cur_token.clone(), }, - message: format!("expected enum name, got {:?}", self.cur_token.token_type), + message: format!("expected enum name, got {:?}", token_type_name(&self.cur_token.token_type)), line: self.cur_token.line, column: self.cur_token.column, }); @@ -1297,7 +1325,7 @@ impl Parser { }, message: format!( "expected enum variant name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1403,7 +1431,7 @@ impl Parser { }, message: format!( "expected struct field name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1511,7 +1539,7 @@ impl Parser { }, message: format!( "expected identifier in array destructuring pattern, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1543,7 +1571,7 @@ impl Parser { }, message: format!( "expected identifier in hash destructuring pattern, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1564,7 +1592,7 @@ impl Parser { }, message: format!( "expected identifier for destructuring alias, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1703,7 +1731,7 @@ impl Parser { }, message: format!( "expected struct field name, got {:?}", - self.cur_token.token_type + token_type_name(&self.cur_token.token_type) ), line: self.cur_token.line, column: self.cur_token.column, @@ -1810,9 +1838,9 @@ impl Parser { got: self.peak_token.clone(), }, message: format!( - "expected {:?}, got {:?}", - expected.clone(), - self.peak_token.token_type + "expected {}, got {}", + token_type_name(&expected), + token_type_name(&self.cur_token.token_type) ), line: self.peak_token.line, column: self.peak_token.column, @@ -1893,3 +1921,36 @@ impl Parser { p } } + + +fn token_type_name(tt: &TokenType) -> String { + match tt { + TokenType::Ident(s) if s.is_empty() => "identifier".to_string(), + TokenType::Ident(s) => format!("'{}'", s), + TokenType::Int(n) => format!("{}", n), + TokenType::Float(f) => format!("{}", f), + TokenType::InterpolatedString(_) => "string".to_string(), + TokenType::Char(c) => format!("'{}'", c), + TokenType::Let => "'let'".to_string(), + TokenType::Const => "'const'".to_string(), + TokenType::Function => "'fn'".to_string(), + TokenType::If => "'if'".to_string(), + TokenType::Else => "'else'".to_string(), + TokenType::Return => "'return'".to_string(), + TokenType::Import => "'import'".to_string(), + TokenType::LBrace => "'{'".to_string(), + TokenType::RBrace => "'}'".to_string(), + TokenType::LParan => "'('".to_string(), + TokenType::RParen => "')'".to_string(), + TokenType::LBracket => "'['".to_string(), + TokenType::RBracket => "']'".to_string(), + TokenType::Semicolon => "';'".to_string(), + TokenType::Comma => "','".to_string(), + TokenType::Colon => "':'".to_string(), + TokenType::Assign => "'='".to_string(), + TokenType::FatArrow => "'=>'".to_string(), + TokenType::EOF => "end of file".to_string(), + TokenType::ILLEGAL => "illegal token".to_string(), + _ => format!("{:?}", tt), // fallback for operators + } +} diff --git a/src/token/token.rs b/src/token/token.rs index e7df2e4..cceeccb 100644 --- a/src/token/token.rs +++ b/src/token/token.rs @@ -29,6 +29,7 @@ pub enum TokenType { Pub, Typeof, Null, + Default, NullCoalesce, @@ -132,6 +133,7 @@ pub fn lookup_ident(ident: &str) -> TokenType { "pub" => TokenType::Pub, "typeof" => TokenType::Typeof, "null" => TokenType::Null, + "default" => TokenType::Default, _ => TokenType::Ident(ident.to_string()), } } From f9bcc067a61efb3dbaf6609bdc867125519342fc Mon Sep 17 00:00:00 2001 From: walonCode Date: Fri, 26 Jun 2026 07:18:16 +0000 Subject: [PATCH 2/5] fix(evaluator): overflow protection on ++/--, guard negative array index, error on undeclared assign --- src/evaluator/evaluator.rs | 71 +++++++++++++++++++++++++++++--------- src/object/object.rs | 2 +- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/evaluator/evaluator.rs b/src/evaluator/evaluator.rs index 807ff47..69e8acf 100644 --- a/src/evaluator/evaluator.rs +++ b/src/evaluator/evaluator.rs @@ -1,4 +1,4 @@ -use std::{cell::RefCell, collections::HashMap, rc::Rc}; +use std::{cell::RefCell, collections::{HashMap, HashSet}, rc::Rc}; use crate::{ ast::ast::{Expression, LetPattern, Program, Statement, StringSegment}, @@ -324,7 +324,7 @@ impl Evaluator { fn eval_expression(&mut self, expr: &Expression, env: &Env) -> Object { match expr { - Expression::Int { value, .. } => Object::Integer(*value as i64), + Expression::Int { value, .. } => Object::Integer(*value), Expression::Float { value, .. } => Object::Float(*value), Expression::InterpolatedString { parts, .. } => { let mut result = String::new(); @@ -738,7 +738,7 @@ impl Evaluator { Object::Null } - Expression::Switch { subject, arms, .. } => { + Expression::Switch { subject, arms, default, .. } => { let subject_val = self.eval_expression(subject, env); if matches!(subject_val, Object::Error { .. }) { return subject_val; @@ -754,6 +754,10 @@ impl Evaluator { } } + if let Some(arm) = default { + return self.eval_statement(arm, env) + } + Object::Null } @@ -772,9 +776,17 @@ impl Evaluator { let updated = match ¤t { Object::Integer(v) => { if matches!(operator.token_type, TokenType::Inc) { - Object::Integer(v + 1) + v.checked_add(1).map(Object::Integer).unwrap_or_else(||Object::Error { + message: format!("integer overflow: {} + 1", v), + line: *line, + column: *column + }) } else { - Object::Integer(v - 1) + v.checked_sub(1).map(Object::Integer).unwrap_or_else(||Object::Error{ + message:format!("integer overflow: {} - 1", v), + line: *line, + column: *column + }) } } Object::Float(v) => { @@ -796,10 +808,16 @@ impl Evaluator { } }; match target.as_ref() { - Expression::Ident { value: name, .. } - if !env.borrow_mut().update(name, updated.clone()) => - { - env.borrow_mut().set(name.clone(), updated.clone()); + Expression::Ident { value: name, .. } => { + if env.borrow().get(name).is_none() { + return Object::Error { + message: format!("cannot update undeclared variable '{}'", name), + line: *line, + column: *column + } + } + + env.borrow_mut().set(name.clone(), updated.clone()) } Expression::Member { object: obj_expr, @@ -905,7 +923,16 @@ impl Evaluator { }; } }; + + let valid_filed:HashSet<&str> = instance_fields.keys().map(|s| s.as_str()).collect(); for (k, v_expr) in fields { + if !valid_filed.contains(k.as_str()) { + return Object::Error{ + message: format!("unknown field '{}' on struct '{}'", k , name), + line: *line, + column: *column + } + } let val = self.eval_expression(v_expr, env); if matches!(val, Object::Error { .. }) { return val; @@ -1073,8 +1100,8 @@ impl Evaluator { fn eval_integer_infix( &self, op: &TokenType, - l: i64, - r: i64, + l: isize, + r: isize, line: usize, column: usize, ) -> Object { @@ -1125,14 +1152,14 @@ impl Evaluator { } TokenType::Square => { let result = (l as f64).powf(r as f64); - if result > i64::MAX as f64 || result < i64::MIN as f64 { + if result > isize::MAX as f64 || result < isize::MIN as f64 { return Object::Error { message: format!("integer overflow: {} ** {}", l, r), line, column, }; } - Object::Integer(result as i64) + Object::Integer(result as isize) } TokenType::Floor => { if r == 0 { @@ -1142,7 +1169,7 @@ impl Evaluator { column, }; } - Object::Integer(((l as f64) / (r as f64)).floor() as i64) + Object::Integer(((l as f64) / (r as f64)).floor() as isize) } TokenType::LT => Object::Bool(l < r), TokenType::GT => Object::Bool(l > r), @@ -1268,7 +1295,12 @@ impl Evaluator { return final_val; } if !env.borrow_mut().update(name, final_val.clone()) { - env.borrow_mut().set(name.clone(), final_val.clone()); + // env.borrow_mut().set(name.clone(), final_val.clone()); + return Object::Error { + message: format!("cannot assign to an undeclared variable '{}'; use 'let {} = ....' to declared it first", + name, name), + line, column + } } final_val } @@ -1400,7 +1432,14 @@ impl Evaluator { match &mut container { Object::Array(elements) => { if let Object::Integer(i) = &idx { - let i = *i as usize; + let i_raw = *i; + if i_raw < 0 { + return Object::Error { + message: format!("index {} out of range (len {})",i_raw, elements.len()), + line, column } + } + + let i = i_raw as usize; if i >= elements.len() { return Object::Error { message: format!("index out of range: {}", i), diff --git a/src/object/object.rs b/src/object/object.rs index faa417a..a0895ee 100644 --- a/src/object/object.rs +++ b/src/object/object.rs @@ -18,7 +18,7 @@ pub trait Evaluable { #[derive(Clone)] pub enum Object { - Integer(i64), + Integer(isize), Float(f64), StringType(String), Char(char), From 2f93c1d91a31a4c3f9f0cbc8303483b0f1b362a7 Mon Sep 17 00:00:00 2001 From: walonCode Date: Fri, 26 Jun 2026 07:18:29 +0000 Subject: [PATCH 3/5] fix(stdlib): restore time.sleep guard, add fmt.format type validation, fix edge cases across modules --- src/std_lib/array.rs | 14 +++++++------- src/std_lib/fmt.rs | 43 +++++++++++++++++++++++++++++++++++++++--- src/std_lib/hash.rs | 2 +- src/std_lib/http.rs | 2 +- src/std_lib/json.rs | 2 +- src/std_lib/rand.rs | 2 +- src/std_lib/strings.rs | 26 ++++++++++++++++++++++--- src/std_lib/time.rs | 39 ++++++++++++++++++++++---------------- 8 files changed, 97 insertions(+), 33 deletions(-) diff --git a/src/std_lib/array.rs b/src/std_lib/array.rs index 9bdd41a..f827071 100644 --- a/src/std_lib/array.rs +++ b/src/std_lib/array.rs @@ -110,8 +110,8 @@ fn len(args: Vec, info: CallInfo) -> Object { }; } match &args[0] { - Object::Array(elems) => Object::Integer(elems.len() as i64), - Object::StringType(s) => Object::Integer(s.len() as i64), + Object::Array(elems) => Object::Integer(elems.len() as isize), + Object::StringType(s) => Object::Integer(s.len() as isize), _ => Object::Error { message: format!( "arrays.len expects ARRAY or STRING, got {}", @@ -228,7 +228,7 @@ fn index_of(args: Vec, info: CallInfo) -> Object { Object::Array(elems) => { for (i, e) in elems.iter().enumerate() { if obj_eq(e, &args[1]) { - return Object::Integer(i as i64); + return Object::Integer(i as isize); } } Object::Integer(-1) @@ -292,8 +292,8 @@ fn slice(args: Vec, info: CallInfo) -> Object { } }; let len = elems.len() as i64; - let start = start.clamp(0, len) as usize; - let end = end.clamp(0, len) as usize; + let start = start.clamp(0, len as isize) as usize; + let end = end.clamp(0, len as isize) as usize; if start >= end { return Object::Array(vec![]); } @@ -401,7 +401,7 @@ fn sum(args: Vec, info: CallInfo) -> Object { for e in &elems { match e { Object::Integer(n) => { - total_int += n; + total_int += *n as i64; total_float += *n as f64; } Object::Float(n) => { @@ -423,7 +423,7 @@ fn sum(args: Vec, info: CallInfo) -> Object { if has_float { Object::Float(total_float) } else { - Object::Integer(total_int) + Object::Integer(total_int as isize) } } diff --git a/src/std_lib/fmt.rs b/src/std_lib/fmt.rs index 0cf4e09..4e17b32 100644 --- a/src/std_lib/fmt.rs +++ b/src/std_lib/fmt.rs @@ -38,9 +38,9 @@ fn to_int(args: Vec, info: CallInfo) -> Object { } match &args[0] { Object::Integer(n) => Object::Integer(*n), - Object::Float(f) => Object::Integer(*f as i64), + Object::Float(f) => Object::Integer(*f as isize), Object::Bool(b) => Object::Integer(if *b { 1 } else { 0 }), - Object::StringType(s) => match s.trim().parse::() { + Object::StringType(s) => match s.trim().parse::() { Ok(n) => Object::Integer(n), Err(_) => Object::Error { message: format!("fmt.to_int: cannot convert \"{}\" to integer", s), @@ -184,12 +184,19 @@ fn format_fn(args: Vec, info: CallInfo) -> Object { let mut i = 0; while i < chars.len() { if chars[i] == '%' && i + 1 < chars.len() { + if chars[i+ 1] == '\0'{ + return Object::Error{ + message: "unterminated format specifier %".to_string(), + line:info.line, + column:info.column + } + } match chars[i + 1] { '%' => { result.push('%'); i += 2; } - 's' | 'd' | 'f' => { + 's' => { if arg_idx >= args.len() { return Object::Error { message: "fmt.format: not enough arguments for format string" @@ -201,7 +208,30 @@ fn format_fn(args: Vec, info: CallInfo) -> Object { result.push_str(&format!("{}", args[arg_idx])); arg_idx += 1; i += 2; + }, + + 'f' => { + match &args[arg_idx] { + Object::Float(n) => { result.push_str(&n.to_string()); arg_idx += 1;}, + _ => return Object::Error { + message: format!("format: %d expect float, got {}",args[arg_idx].type_name() ), + line: info.line, + column: info.column + } + } } + + 'd' => { + match &args[arg_idx] { + Object::Integer(n) => { result.push_str(&n.to_string()); arg_idx += 1;}, + _ => return Object::Error { + message: format!("format: %d expect integer, got {}",args[arg_idx].type_name() ), + line: info.line, + column: info.column + } + } + } + _ => { result.push(chars[i]); i += 1; @@ -212,6 +242,13 @@ fn format_fn(args: Vec, info: CallInfo) -> Object { i += 1; } } + if arg_idx < args.len(){ + return Object::Error{ + message:format!("fmt.format: {} unused argument(s)", args.len() - arg_idx), + line: info.line, + column: info.column + } + } Object::StringType(result) } diff --git a/src/std_lib/hash.rs b/src/std_lib/hash.rs index ee15853..96e622a 100644 --- a/src/std_lib/hash.rs +++ b/src/std_lib/hash.rs @@ -158,7 +158,7 @@ fn len(args: Vec, info: CallInfo) -> Object { }; } match &args[0] { - Object::Hash(pairs) => Object::Integer(pairs.len() as i64), + Object::Hash(pairs) => Object::Integer(pairs.len() as isize), _ => Object::Error { message: format!("hash.len expects HASH, got {}", args[0].type_name()), line: info.line, diff --git a/src/std_lib/http.rs b/src/std_lib/http.rs index e04add1..9f1cb0b 100644 --- a/src/std_lib/http.rs +++ b/src/std_lib/http.rs @@ -9,7 +9,7 @@ fn make_response(status: u16, body: String) -> Object { Object::Hash(vec![ ( Object::StringType("status".to_string()), - Object::Integer(status as i64), + Object::Integer(status as isize), ), ( Object::StringType("body".to_string()), diff --git a/src/std_lib/json.rs b/src/std_lib/json.rs index 6c9cb57..2cdbb7c 100644 --- a/src/std_lib/json.rs +++ b/src/std_lib/json.rs @@ -38,7 +38,7 @@ fn json_to_object(val: Value) -> Object { Value::Bool(b) => Object::Bool(b), Value::Number(n) => { if let Some(i) = n.as_i64() { - Object::Integer(i) + Object::Integer(i as isize) } else { Object::Float(n.as_f64().unwrap_or(0.0)) } diff --git a/src/std_lib/rand.rs b/src/std_lib/rand.rs index 049377a..f0c2f03 100644 --- a/src/std_lib/rand.rs +++ b/src/std_lib/rand.rs @@ -45,7 +45,7 @@ fn int(args: Vec, info: CallInfo) -> Object { column: info.column, }; } - Object::Integer(rand::rng().random_range(min..=max)) + Object::Integer(rand::rng().random_range(min as i64..=max as i64) as isize) } fn float(args: Vec, info: CallInfo) -> Object { diff --git a/src/std_lib/strings.rs b/src/std_lib/strings.rs index f65c8c6..a381452 100644 --- a/src/std_lib/strings.rs +++ b/src/std_lib/strings.rs @@ -379,7 +379,7 @@ fn index(args: Vec, info: CallInfo) -> Object { } }; match s.find(substr.as_str()) { - Some(i) => Object::Integer(i as i64), + Some(i) => Object::Integer(i as isize), None => Object::Integer(-1), } } @@ -418,7 +418,7 @@ fn count(args: Vec, info: CallInfo) -> Object { }; } }; - Object::Integer(s.matches(substr.as_str()).count() as i64) + Object::Integer(s.matches(substr.as_str()).count() as isize) } fn repeat(args: Vec, info: CallInfo) -> Object { @@ -556,7 +556,7 @@ fn parse_int(args: Vec, info: CallInfo) -> Object { }; } match &args[0] { - Object::StringType(s) => match s.trim().parse::() { + Object::StringType(s) => match s.trim().parse::() { Ok(n) => Object::Integer(n), Err(_) => Object::Error { message: format!("strings.parse_int: cannot parse \"{}\" as integer", s), @@ -603,6 +603,25 @@ fn parse_float(args: Vec, info: CallInfo) -> Object { } } +fn len(args: Vec, info:CallInfo) -> Object { + if args.len() != 1 { + return Object::Error { + message: "strings.len() takes 1 argument".to_string(), + line: info.line, + column: info.column + } + } + + match &args[0] { + Object::StringType(s) => Object::Integer(s.chars().count() as isize), + _ => Object::Error { + message: "strings.len expects a string".to_string(), + line: info.line, + column: info.column + } + } +} + pub fn module() -> Object { let mut members: HashMap = HashMap::new(); members.insert("to_upper".to_string(), Object::Builtin(to_upper)); @@ -628,6 +647,7 @@ pub fn module() -> Object { members.insert("is_empty".to_string(), Object::Builtin(is_empty)); members.insert("pad_left".to_string(), Object::Builtin(pad_left)); members.insert("pad_right".to_string(), Object::Builtin(pad_right)); + members.insert("len".to_string(), Object::Builtin(len)); Object::Module { name: "strings".to_string(), pub_gated: false, diff --git a/src/std_lib/time.rs b/src/std_lib/time.rs index d6674bc..345816d 100644 --- a/src/std_lib/time.rs +++ b/src/std_lib/time.rs @@ -12,7 +12,7 @@ fn now(args: Vec, info: CallInfo) -> Object { column: info.column, }; } - Object::Integer(Utc::now().timestamp_millis()) + Object::Integer(Utc::now().timestamp_millis() as isize) } fn unix(args: Vec, info: CallInfo) -> Object { @@ -23,7 +23,7 @@ fn unix(args: Vec, info: CallInfo) -> Object { column: info.column, }; } - Object::Integer(Utc::now().timestamp()) + Object::Integer(Utc::now().timestamp() as isize) } fn sleep(args: Vec, info: CallInfo) -> Object { @@ -36,6 +36,13 @@ fn sleep(args: Vec, info: CallInfo) -> Object { } match &args[0] { Object::Integer(ms) => { + if *ms < 0 { + return Object::Error{ + message: format!("time.sleep: duration cannot be negative, got {}", ms), + line: info.line, + column:info.column + } + } std::thread::sleep(std::time::Duration::from_millis(*ms as u64)); Object::Null } @@ -56,7 +63,7 @@ fn since(args: Vec, info: CallInfo) -> Object { }; } match &args[0] { - Object::Integer(start_ms) => Object::Integer(Utc::now().timestamp_millis() - start_ms), + Object::Integer(start_ms) => Object::Integer((Utc::now().timestamp_millis() - *start_ms as i64) as isize), _ => Object::Error { message: format!("time.since expects INTEGER, got {}", args[0].type_name()), line: info.line, @@ -100,7 +107,7 @@ fn format(args: Vec, info: CallInfo) -> Object { } }; let dt: DateTime = Local - .timestamp_millis_opt(ms) + .timestamp_millis_opt(ms as i64) .single() .unwrap_or_else(Local::now); Object::StringType(dt.format(&layout).to_string()) @@ -117,10 +124,10 @@ fn year(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.year() as i64) + Object::Integer(dt.year() as isize) } _ => Object::Error { message: format!("time.year expects INTEGER, got {}", args[0].type_name()), @@ -141,10 +148,10 @@ fn month(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.month() as i64) + Object::Integer(dt.month() as isize) } _ => Object::Error { message: format!("time.month expects INTEGER, got {}", args[0].type_name()), @@ -165,10 +172,10 @@ fn day(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.day() as i64) + Object::Integer(dt.day() as isize) } _ => Object::Error { message: format!("time.day expects INTEGER, got {}", args[0].type_name()), @@ -189,10 +196,10 @@ fn hour(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.hour() as i64) + Object::Integer(dt.hour() as isize) } _ => Object::Error { message: format!("time.hour expects INTEGER, got {}", args[0].type_name()), @@ -213,10 +220,10 @@ fn minute(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.minute() as i64) + Object::Integer(dt.minute() as isize) } _ => Object::Error { message: format!("time.minute expects INTEGER, got {}", args[0].type_name()), @@ -237,10 +244,10 @@ fn second(args: Vec, info: CallInfo) -> Object { match &args[0] { Object::Integer(ms) => { let dt = Utc - .timestamp_millis_opt(*ms) + .timestamp_millis_opt(*ms as i64) .single() .unwrap_or_else(Utc::now); - Object::Integer(dt.second() as i64) + Object::Integer(dt.second() as isize) } _ => Object::Error { message: format!("time.second expects INTEGER, got {}", args[0].type_name()), From b3a3f8eb12a4e834bf680d38ed64025709d170fa Mon Sep 17 00:00:00 2001 From: walonCode Date: Fri, 26 Jun 2026 07:18:37 +0000 Subject: [PATCH 4/5] feat(fmt): add formatter foundation, extend lint rules and commands --- INTERNALS.md | 646 +++++++++++++++++++++++++++++++++++ crates/fmt/src/commands.rs | 54 +++ crates/fmt/src/formatter.rs | 14 + crates/fmt/src/lint_rules.rs | 36 +- crates/fmt/src/main.rs | 13 +- 5 files changed, 761 insertions(+), 2 deletions(-) create mode 100644 INTERNALS.md create mode 100644 crates/fmt/src/formatter.rs diff --git a/INTERNALS.md b/INTERNALS.md new file mode 100644 index 0000000..c642608 --- /dev/null +++ b/INTERNALS.md @@ -0,0 +1,646 @@ +# code-lang Internals — How the Language Works + +> This document explains the full execution pipeline from source text to a result. +> It is written for someone reading or modifying the interpreter source, not for +> end users of the language. + +--- + +## Overview + +code-lang is a **tree-walking interpreter** written in Rust. There is no bytecode compiler and no virtual machine — the AST (Abstract Syntax Tree) is evaluated directly. The pipeline is linear: + +``` +Source text (.cl file or REPL input) + │ + ▼ + [ Lexer ] src/lexer/lexer.rs + │ produces Token stream + ▼ + [ Parser ] src/parser/parser.rs + │ produces Program (AST) + ▼ + [ Evaluator ] src/evaluator/evaluator.rs + │ walks AST, returns Object + ▼ + [ Result ] printed to stdout or returned to REPL +``` + +Each stage is independent — the lexer knows nothing about the parser, the parser knows nothing about the evaluator. The stages communicate through two data structures: `Token` (lexer → parser) and `Program` / `Expression` / `Statement` (parser → evaluator). + +--- + +## Entry Points + +**File:** `src/main.rs` + +The binary has two modes, selected by whether a file argument is provided: + +``` +code-lang → run_repl() +code-lang script.cl → execute(file_contents) +``` + +Both modes live in `src/repl/repl.rs`. + +`execute()` is a single-shot pipeline: lex → parse → eval → print result → exit. +`run_repl()` runs the same pipeline in a loop, reusing the same `Evaluator` and `Environment` across inputs so variables declared in one line are visible in the next. + +The only file extension accepted in script mode is `.cl` — any other extension is rejected before reading the file. + +--- + +## Stage 1 — Lexer + +**File:** `src/lexer/lexer.rs` +**Key type:** `Lexer` +**Output:** a stream of `Token` values, one per call to `next_token()` + +### What it does + +The lexer converts raw source text into a flat sequence of tokens. It works character by character, maintaining: + +- `position` — index of the character currently being examined +- `read_position` — index of the next character (one ahead) +- `ch` — the current character +- `line` / `column` — current source position (updated on every `read_char()`) + +Every token carries its `line` and `column` so error messages can point to the right place. + +### How `next_token()` works + +1. Skip whitespace (spaces, tabs, newlines, carriage returns) +2. Look at `self.ch` and match it: + - Single-character tokens: `(`, `)`, `{`, `}`, `[`, `]`, `;`, `,`, `:` + - Two-character tokens: peek at `self.peak_char()` to decide — `==`, `!=`, `=>`, `++`, `--`, `+=`, `-=`, `*=`, `/=`, `%=`, `<=`, `>=`, `//`, `&&`, `||`, `??` + - Comments: `#` skips to end of line; `/*` skips to `*/` — neither produces a token, `next_token()` calls itself recursively + - String literal `"..."` — calls `read_string()` which handles interpolation + - Char literal `'x'` — calls `read_char_type()` + - Identifier or keyword — calls `read_identifier()` then `lookup_ident()` to check if it's a keyword + - Number — calls `read_number()` which handles both integers and floats (a `.` mid-number switches to float mode) +3. Advance `position` and return the token + +### Keyword map + +`lookup_ident()` in `src/token/token.rs` maps identifier strings to keyword token types: + +``` +"fn" → Function "let" → Let +"const" → Const "if" → If +"else" → Else "elseif" → ElseIf +"while" → While "for" → For +"return" → Return "break" → Break +"continue" → Continue "in" → In +"import" → Import "struct" → Struct +"switch" → Switch "enum" → Enum +"pub" → Pub "typeof" → Typeof +"null" → Null "true" → True +"false" → False "default" → Default +anything else → Ident(string) +``` + +### String interpolation + +`read_string()` handles `"..."` literals with `${}` embedded expressions. It scans character by character building a `Vec`: + +- Ordinary characters accumulate into `StringPart::Literal(string)` +- `${` triggers a nested scan: reads until the matching `}` (tracking brace depth), storing the raw source text of the expression as `StringPart::Expr(source)` + +The parser later re-lexes and re-parses each `StringPart::Expr` source string to produce the final `Expression::InterpolatedString`. + +### Token types + +All token types are in `src/token/token.rs` as the `TokenType` enum. Notable ones: + +| Token | Represents | +|---|---| +| `Int(isize)` | Integer literal, value embedded in token | +| `Float(f64)` | Float literal | +| `Char(char)` | Character literal `'x'` | +| `InterpolatedString(Vec)` | String with optional `${}` segments | +| `Ident(String)` | Any name that is not a keyword | +| `FatArrow` | `=>` used in switch arms | +| `Floor` | `//` integer division | +| `Square` | `**` power | +| `NullCoalesce` | `??` | +| `Inc` / `Dec` | `++` / `--` | +| `EOF` | End of input | +| `ILLEGAL` | Character the lexer cannot classify | + +--- + +## Stage 2 — Parser + +**File:** `src/parser/parser.rs` +**Key type:** `Parser` +**Output:** `Program` — a list of `Statement` nodes + +### How the parser works + +code-lang uses a **Pratt parser** (top-down operator precedence parsing). This is the same technique used by V8's JavaScript parser and `rustc`'s expression parser. + +The parser holds: +- `cur_token` — the token currently being examined +- `peek_token` — the next token (one ahead) +- `errors: Vec` — collected errors (parser never panics) + +`next_token()` advances both: `cur_token ← peek_token`, `peek_token ← lexer.next_token()`. + +### Top level: `parse_program()` + +Calls `parse_statement()` in a loop until `EOF`. Each statement is appended to `Program::statements`. + +### Statement parsing + +`parse_statement()` dispatches on `cur_token.token_type`: + +| Token | Calls | +|---|---| +| `Let` | `parse_let_statement()` | +| `Const` | `parse_const_statement()` | +| `Return` | `parse_return_statement()` | +| `Import` | `parse_import_statement()` | +| `Break` | `parse_break_statement()` | +| `Continue` | `parse_continue_statement()` | +| `Struct` | `parse_struct_statement()` | +| `Enum` | `parse_enum_statement()` | +| `Pub` | `parse_pub_statement()` | +| anything else | `parse_expression_statement()` | + +### Expression parsing: Pratt (TDOP) + +`parse_expression(precedence)` is the core. It: + +1. Calls the **prefix function** for `cur_token` to get a left-hand expression +2. Loops: while `peek_token` has higher precedence than `precedence`, calls the **infix function** for `peek_token`, passing in the current left-hand expression + +Prefix functions handle things that start an expression: +- Literal tokens (`Int`, `Float`, `Char`, `Bool`, `Null`, `InterpolatedString`) → literal `Expression` nodes +- `Ident` → `Expression::Ident` +- `Bang` / `Minus` → `Expression::Prefix` +- `LParan` → grouped expression (recurse, expect `)`) +- `LBracket` → array literal +- `LBrace` → hash literal +- `Function` → `parse_function_literal()` +- `If` → `parse_if_expression()` +- `While` → `parse_while_expression()` +- `For` → `parse_for_expression()` (detects `for-in` if `In` token follows the variable) +- `Switch` → `parse_switch_expression()` +| `Typeof` → `parse_typeof_expression()` + +Infix functions handle binary operators — `+`, `-`, `*`, `/`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `&&`, `||`, `??`, `**`, `//`, `%`: +- All emit `Expression::Infix` +- `LParan` as infix → `Expression::Call` (function call) +- `LBracket` as infix → `Expression::Index` (array/hash index) +- `Dot` as infix → `Expression::Member` (property access) +- `Inc` / `Dec` as infix → `Expression::Update` (postfix) + +### Precedence levels + +``` +Lowest = 0 (default) +Assign = 1 = += -= *= /= %= +NullCoal = 2 ?? +Or = 3 || +And = 4 && +Equals = 5 == != +LessGreater = 6 < > <= >= +Sum = 7 + - +Product = 8 * / % // ** +Prefix = 9 -x !x (not an infix level) +Postfix = 10 x++ x-- x() x[] x.y +``` + +### Error handling + +The parser never panics. When an unexpected token is encountered, it pushes a `ParseError { message, line, column }` and tries to continue (usually returning `None` from the current parse function, which causes the caller to also return `None`). All errors are collected in `parser.errors` and checked after `parse_program()` returns. If any errors exist, evaluation is skipped. + +### Switch arm parsing + +`parse_switch_expression()` loops until `}`. At the top of each iteration, after `next_token()`, it checks if `cur_token` is `Default`. If so: +- Expect `=>` +- Parse the body (block or expression) +- Consume optional comma +- Store body in `default: Option>` +- `continue` to next iteration + +Normal arms: parse the pattern expression, expect `=>`, parse the body, consume optional comma, push `SwitchArm { pattern, body }`. + +--- + +## Stage 3 — AST + +**File:** `src/ast/ast.rs` + +The AST is composed of three main types: + +### `Program` + +The root. Contains `statements: Vec`. + +### `Statement` + +An enum covering all statement forms: + +| Variant | Represents | +|---|---| +| `Let { pattern, value, line, column, .. }` | `let x = expr` | +| `Const { pattern, value, .. }` | `const x = expr` | +| `Return { value, .. }` | `return expr` | +| `Expression { expr, .. }` | A statement that is just an expression | +| `Block { statements, .. }` | `{ stmt; stmt; }` | +| `Import { path, .. }` | `import "module"` | +| `Break` / `Continue` | Loop control | +| `Struct { name, field, .. }` | Struct definition | +| `Enum { name, variant, .. }` | Enum definition | +| `Pub { statement, .. }` | Wraps a `Let` or `Const` to mark it exported | + +`LetPattern` covers the left-hand side of `let`/`const`: +- `Ident(String)` — `let x = ...` +- `Array(Vec)` — `let [a, b] = ...` +- `Hash(Vec<(String, String)>)` — `let { x, y: alias } = ...` + +### `Expression` + +An enum covering all expression forms. Every variant carries `line` and `column` for error reporting. + +| Variant | Represents | +|---|---| +| `Ident` | A variable name | +| `Int(isize)` | Integer literal | +| `Float(f64)` | Float literal | +| `Char(char)` | Character literal | +| `Boolean(bool)` | `true` / `false` | +| `Null` | `null` | +| `InterpolatedString { parts }` | `"hello ${name}"` | +| `Prefix { op, right }` | `-x`, `!flag` | +| `Infix { left, op, right }` | `a + b`, `x == y` | +| `NullCoalesce { left, right }` | `a ?? b` | +| `Update { operator, target, prefix }` | `x++`, `--y` | +| `Typeof { value }` | `typeof expr` | +| `Call { function, argument }` | `fn(args)` | +| `Index { left, index }` | `arr[i]`, `hash[key]` | +| `Member { object, property }` | `obj.field` | +| `Array { element }` | `[1, 2, 3]` | +| `HashLiteral { pair }` | `{ "a": 1 }` | +| `Function { parameter, body }` | `fn(x, y) { ... }` | +| `If { condition, consequence, alternative, if_else }` | Full if/elseif/else | +| `While { condition, body }` | `while (cond) { }` | +| `For { init, condition, post, body }` | C-style for loop | +| `ForIn { key, value, iterable, body }` | `for (k, v in hash)` | +| `Switch { subject, arms, default }` | `switch (x) { ... }` | +| `StructLiteral { name, fields }` | `Point { x: 1, y: 2 }` | + +### Supporting types + +- `SwitchArm { pattern: Expression, body: Box }` — one arm of a switch +- `Param { name: String, default: Option> }` — one function parameter +- `ElseIF { condition: Expression, consequences: Statement }` — one elseif branch +- `StringSegment` — either `Literal(String)` or `Expr(Box)`, used inside `InterpolatedString` + +--- + +## Stage 4 — Evaluator + +**File:** `src/evaluator/evaluator.rs` +**Key type:** `Evaluator` + +### Evaluator struct + +```rust +pub struct Evaluator { + pub loop_depth: usize, // how many loops are currently active + pub call_depth: usize, // how deep in function calls we are + pub module_cache: HashMap, // stdlib modules, loaded once +} +``` + +`module_cache` is populated in `preload_stdlib()` called from `new()`. All 12 stdlib modules are loaded into the cache at startup, not on first import. + +`register_globals()` injects global builtins into the root environment. Currently only `is_error` lives here. + +### Constant: `MAX_CALL_DEPTH = 500` + +Function calls beyond this depth return an error instead of causing a Rust stack overflow. + +### `eval()` — top level + +``` +eval(program, env): + for each statement in program.statements: + result = eval_statement(statement, env) + if result is Return(v) → return v (unwrap return value) + if result is Error → return immediately (propagate errors) + return last result +``` + +### `eval_statement()` — statement dispatch + +Matches on the `Statement` variant and calls the appropriate handler: + +| Statement | Handler | +|---|---| +| `Block` | Creates an enclosed environment; evaluates each statement; stops on Return/Error/Break/Continue | +| `Let` | Evaluates value, binds to name in env via `env.set()` | +| `Const` | Same as Let but via `env.set_const()` — blocks future reassignment | +| `Return` | Evaluates value, wraps in `Object::Return` | +| `Break` | Returns `Object::Break` | +| `Continue` | Returns `Object::Continue` | +| `Import` | Checks module cache, reads file from disk, lexes/parses/evals it | +| `Struct` | Evaluates default field expressions, stores as `Object::StructType` | +| `Enum` | Stores as `Object::EnumType` | +| `Pub` | Evals inner statement, then calls `env.mark_pub(name)` | +| `Expression` | Calls `eval_expression()`, returns result directly | + +### `eval_expression()` — expression dispatch + +The large match at the heart of the evaluator. Key arms: + +**Literals** — `Int`, `Float`, `Char`, `Boolean`, `Null` wrap directly into the corresponding `Object` variant. + +**Ident** — calls `env.get(name)`. Returns `Object::Error` if not found ("identifier not found"). + +**InterpolatedString** — evaluates each `StringSegment::Expr` part and formats it to a string, concatenates with `StringSegment::Literal` parts. + +**Prefix** — evaluates right operand, calls `eval_prefix()`: +- `!` → negates booleans, converts Null to true, everything else to false +- `-` → negates integers and floats, errors on other types + +**Infix** — evaluates both operands, dispatches on type pair: +- Integer + Integer → `eval_integer_infix()` with checked arithmetic +- Float + Float → `eval_float_infix()` with NaN/Infinity guard +- Integer + Float (or vice versa) → both converted to float, then float infix +- String + String → concatenation (for `+` and `+=`) + +**If** — evaluates condition, checks truthiness (`is_truthy()`), evaluates the matching branch. Truthiness: everything is truthy except `null` and `false`. + +**While** — loop with `loop_depth` tracking. `Object::Break` unwinds and returns Null. `Object::Continue` skips to next iteration. Errors propagate out. + +**For** — evaluates init statement, then loops: check condition, eval body, eval post statement. + +**ForIn** — iterates `Object::Array` (single variable = element, two variables = index + element) or `Object::Hash` (key + value pairs). + +**Function** — captures the current environment by reference (`Rc>`), stores `parameters` and `body`, returns `Object::Function`. + +**Call** — evaluates function expression and all arguments, calls `apply_function()`. + +**Index** — evaluates left and index expressions: +- Array: bounds-checks (including negative index guard), returns element or error +- Hash: searches `Vec<(Object, Object)>` linearly for a matching key + +**Member** — evaluates object, then: +- `Object::Module` → looks up member in module's HashMap, checks `pub_gated` +- `Object::StructInstance` → looks up field +- `Object::EnumType` → looks up variant by name, returns `Object::EnumVariant` +- Other types → error + +**Switch** — evaluates subject, iterates arms calling `objects_equal()` for each pattern. If a match is found, evaluates that arm's body. If no arm matches and `default` is `Some`, evaluates the default body. Otherwise returns Null. + +**Assignment (`eval_assignment`)** — called when an `Infix` with `=`/`+=`/etc. is found. Handles three target forms: +- `Ident` → calls `env.update(name, val)`. Errors if name not found (no silent creation). +- `Index` → evaluates the container, updates in place (arrays: bounds check; hashes: find-and-replace or insert) +- `Member` → updates a struct instance field + +### `apply_function()` — function calls + +Called for both user functions and builtin functions: + +**User function (`Object::Function`):** +1. Check `call_depth >= MAX_CALL_DEPTH` → error +2. Check arg count vs required params (accounting for `self` in methods and default params) +3. Create new `Environment::new_enclosed(func_env)` — uses the closure's captured env as outer, not the call site's env +4. Bind each param to its arg (or default value if arg omitted) +5. `call_depth += 1` → eval body → `call_depth -= 1` +6. Unwrap `Object::Return` from body result + +**Builtin (`Object::Builtin`):** +- Calls the function pointer directly with args and `CallInfo { line, column }` + +**BuiltinHigherOrder (`Object::BuiltinHigherOrder`):** +- Calls the function pointer with args, `CallInfo`, and `&mut self` (so the builtin can call back into the evaluator to invoke user functions) + +### Integer arithmetic safety + +`eval_integer_infix()` uses checked arithmetic for `+`, `-`, `*`, `**`: +``` +l.checked_add(r).map(Object::Integer).unwrap_or_else(|| Object::Error { "integer overflow" }) +``` + +`/` and `%` check for zero before dividing. + +`**` converts to f64, checks if result fits in `isize`, then converts back. + +`//` (floor division) also checks for zero. + +### Float safety + +`eval_float_infix()` uses a `float_guard()` helper that checks for `NaN` or `±Infinity` after every float operation and returns an error instead of propagating the IEEE 754 special value. + +--- + +## Stage 5 — Object System + +**File:** `src/object/object.rs` + +Every value in code-lang is an `Object`. The enum has 19 variants: + +| Variant | Rust type | Displayed as | +|---|---|---| +| `Integer(isize)` | platform-sized int | `42` | +| `Float(f64)` | 64-bit float | `3.14` | +| `StringType(String)` | heap string | `"hello"` | +| `Char(char)` | Unicode scalar | `'a'` | +| `Bool(bool)` | boolean | `true` / `false` | +| `Null` | unit | `null` | +| `Array(Vec)` | ordered list | `[1, 2, 3]` | +| `Hash(Vec<(Object, Object)>)` | key-value pairs | `{a: 1}` | +| `Function { parameters, body, env }` | closure | `fn(x)` | +| `Builtin(fn(...) -> Object)` | Rust function pointer | `[Builtin]` | +| `BuiltinHigherOrder(fn(..., &mut dyn Evaluable) -> Object)` | Rust fn with evaluator access | `[Builtin]` | +| `StructType { name, default }` | struct definition | `struct Point` | +| `StructInstance { type_name, fields }` | struct value | `Point { x: 1, y: 2 }` | +| `EnumType { name, variants }` | enum definition | `Direction(North \| South)` | +| `EnumVariant { enum_name, variant }` | enum value | `Direction.North` | +| `Module { name, pub_gated, members }` | imported module | `[Module: fmt]` | +| `Return(Box)` | control flow signal | (internal) | +| `Break` | control flow signal | (internal) | +| `Continue` | control flow signal | (internal) | +| `Error { message, line, column }` | runtime error | `error: ...` | + +`Return`, `Break`, `Continue`, and `Error` are not user-visible values — they are signals that propagate up through the evaluation stack and are consumed by the statement handlers that understand them (loops consume Break/Continue, `eval()` unwraps Return, error display consumes Error). + +### `Hash` is a `Vec`, not a `HashMap` + +`Object::Hash` stores pairs as `Vec<(Object, Object)>` rather than a `HashMap`. This is intentional — keys can be any `Object` (including non-hashable ones like `Array`), and the hash is small enough that linear scan is fine. Lookup is O(n) via `objects_equal()`. + +### The `Evaluable` trait + +```rust +pub trait Evaluable { + fn call_function(&mut self, func: Object, args: Vec, info: CallInfo) -> Object; +} +``` + +`Evaluator` implements this trait. It is the interface through which `BuiltinHigherOrder` functions (like `arrays.map`) call back into the evaluator to invoke user-provided function values. Without this trait, stdlib functions would need a reference to `Evaluator`, which would create a circular dependency. + +### `Environment` + +Environments form a linked scope chain: + +``` +Root env (globals: is_error, imported modules) + └─ Block env (for each { ... } block) + └─ Function env (enclosed over the closure's defining scope) +``` + +Each `Environment` has: +- `store: HashMap` — bindings in this scope +- `consts: HashMap` — names that cannot be reassigned +- `pubs: HashSet` — names visible to importers (used by `pub let`) +- `outer: Option>>` — parent scope + +`get(name)` walks the chain upward until it finds the name or reaches the root. +`update(name, val)` also walks upward, updating the first scope where the name exists. Returns `false` if not found (used by assignment to detect undeclared variables). +`set(name, val)` always writes to the current (innermost) scope. + +Environments are reference-counted (`Rc>`). Functions capture a reference to their defining environment, enabling closures. + +--- + +## Stage 6 — Standard Library + +**Directory:** `src/std_lib/` + +All 12 stdlib modules are written in Rust and registered in `Evaluator::preload_stdlib()`: + +| Module | File | Contents | +|---|---|---| +| `arrays` | `array.rs` | push, pop, len, map, filter, reduce, find, any, all, sort, zip, flatten, unique, slice, contains, index_of, reverse, concat, chunk, dedupe, first, last | +| `strings` | `strings.rs` | to_upper, to_lower, split, join, contains, replace, trim, trim_left, trim_right, reverse, starts_with, ends_with, index_of, count, repeat, chars, from_chars, parse_int, parse_float, len, lines, is_empty, pad_left, pad_right | +| `math` | `math.rs` | PI, E, sqrt, abs, pow, floor, ceil, round, log, log2, sin, cos, tan, min, max, clamp, sign, gcd, lcm | +| `fmt` | `fmt.rs` | print, eprint, input, to_str, to_int, to_float, format, clear | +| `hash` | `hash.rs` | keys, values, entries, has_key, merge, delete, len, get | +| `fs` | `fs.rs` | read_file, write_file, append_file, read_lines, exists, list_dir, mkdir, copy, rename, remove | +| `path` | `path.rs` | join, basename, dirname, stem, extension, absolute, is_absolute | +| `os` | `os_mod.rs` | args, platform, arch, get_env, set_env, get_wd, hostname, exit | +| `time` | `time.rs` | now, unix, sleep, since, format, year, month, day, hour, minute, second | +| `json` | `json.rs` | parse, stringify | +| `rand` | `rand.rs` | int, float, choice, shuffle | +| `http` | `http.rs` | get, post, post_json | + +Each module's `module()` function returns an `Object::Module` with a `HashMap` of its members. Functions are stored as `Object::Builtin` (or `Object::BuiltinHigherOrder` for map/filter/reduce/find/any/all). + +### Import resolution + +When `import "arrays"` is evaluated: + +1. Check `module_cache` — if found, return it directly (no re-evaluation) +2. If not in cache (user-defined modules): read the `.cl` file from disk, lex/parse/eval it, store resulting environment as `Object::Module` +3. Bind the module to the import name in the current environment + +Stdlib modules are always found in step 1 because `preload_stdlib()` populates the cache at startup. + +--- + +## Error Display + +**File:** `src/repl/repl.rs` — `show_error()` + +When an `Object::Error` is returned (either from the evaluator or as a parse error): + +``` +error: cannot add INTEGER and STRING + --> 3:14 + | + 3 | let x = 1 + "hello"; + | ^ +hint: use fmt.to_str() to convert a number to string +``` + +The format: +1. `error: ` on its own line +2. ` --> line:column` with a gutter showing the line number width +3. The source line +4. A caret `^` pointing to the error column +5. An optional `hint:` line from `get_hint()` — a lookup table matching common error message substrings to fix suggestions + +In REPL mode: errors print to stderr, the REPL continues. +In script mode: errors print to stderr, process exits with code 1. + +--- + +## code-lang-fmt + +**Directory:** `crates/fmt/` + +A separate binary (`code-lang-fmt`) that shares the same lexer and parser via the `code-lang` library crate. It does not use the evaluator at all. + +Three subcommands: + +**`check`** — parse each file, report parse errors, exit 1 if any found. Does not evaluate. + +**`lint`** — parse each file, run lint rules over the AST, report findings. With `--fix`, applies auto-fixable rules. + +Lint rules are in `crates/fmt/src/lint_rules.rs`. Each rule implements the `Visitor` trait from `crates/fmt/src/visitor.rs`, which provides default `visit_*` methods that recurse into child nodes. Rules override only the methods they care about. + +Current rules: `UnusedImport`, `ShadowedBinding`, `DeadCode`, `EmptyBlock`, `UnusedVariable`, `UndefinedVariable`, `ConstReassignment`. + +**`format`** — not yet implemented (planned, see better-tools.md). + +--- + +## Data Flow: A Single Expression + +Tracing `let x = 1 + 2;` from text to result: + +``` +Source: "let x = 1 + 2;" + +Lexer produces: + Token(Let, 1:1) + Token(Ident("x"), 1:5) + Token(Assign, 1:7) + Token(Int(1), 1:9) + Token(Plus, 1:11) + Token(Int(2), 1:13) + Token(Semicolon, 1:14) + Token(EOF, 1:15) + +Parser produces: + Statement::Let { + pattern: LetPattern::Ident("x"), + value: Expression::Infix { + left: Expression::Int { value: 1 }, + op: Token(Plus), + right: Expression::Int { value: 2 }, + }, + line: 1, column: 1, + } + +Evaluator: + eval_statement(Let { pattern: Ident("x"), value: Infix { ... } }) + eval_expression(Infix { left: Int(1), op: Plus, right: Int(2) }) + eval_expression(Int { value: 1 }) → Object::Integer(1) + eval_expression(Int { value: 2 }) → Object::Integer(2) + eval_integer_infix(Plus, 1, 2) + 1_isize.checked_add(2) → Some(3) + → Object::Integer(3) + env.set("x", Object::Integer(3)) + → Object::Null + +REPL: Null is suppressed (not printed) +``` + +--- + +## Key Design Decisions + +**Tree-walking, not bytecode.** Simpler to build and modify. The AST is evaluated directly, which is slower but means each AST node type has one clear evaluation rule. Good for a language at this stage. + +**Errors as values, not exceptions.** `Object::Error` propagates up through the call stack rather than unwinding via panic or Rust's `?` operator. This means every function that can fail returns `Object` and every caller must check for `Object::Error { .. }`. The advantage is that errors can be caught with `is_error()` and stored in variables. + +**`Rc>` for scopes.** Reference counting is used instead of arena allocation or a borrow-checker-friendly scope system because closures need to capture environments that outlive their defining scope. The `RefCell` allows mutation (variable assignment) through shared references. + +**`Hash` as `Vec`, not `HashMap`.** Keys can be any `Object`. Since `Object` is not `Hash + Eq`, it cannot be used as a `HashMap` key without manual implementation. The linear scan cost is acceptable for the typical small hash sizes in a scripting context. + +**Stdlib preloaded, not lazy.** All 12 modules are loaded at startup into `module_cache`. This means every program pays the initialization cost (~trivial in practice since stdlib functions are just Rust function pointers), but imports never do disk I/O or parsing. diff --git a/crates/fmt/src/commands.rs b/crates/fmt/src/commands.rs index b2e3d6a..23baf6f 100644 --- a/crates/fmt/src/commands.rs +++ b/crates/fmt/src/commands.rs @@ -6,6 +6,7 @@ use code_lang::{ parser::parser::Parser, }; +use crate::formatter::Formatter; use crate::lint_rules::{ ConstReassignment, DeadCode, EmptyBlock, LintFix, LintRule, LintSeverity, ShadowedBinding, UndefinedVariable, UnusedImport, UnusedVariable, @@ -217,3 +218,56 @@ fn apply_fixes(path: &PathBuf, src: String, fixes: Vec<&LintFix>) { eprintln!("{}: failed to write fixes: {}", path.display(), e); } } + + +pub fn format_file(files: &[PathBuf], stdout:bool) -> Result<()> { + for file in files { + let ext_ok = file + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.eq_ignore_ascii_case("cl")) + .unwrap_or(false); + + if !ext_ok { + bail!("expect a .cl file, got: {}", file.display()) + } + + let src = match fs::read_to_string(file) { + Ok(s) => s, + Err(e) => { + eprintln!("{}: cannot read file: {}", file.display(), e); + continue; + } + }; + + let lines: Vec<&str> = src.lines().collect(); + let lexer = Lexer::new(src.clone()); + let mut parser = Parser::new(lexer); + let program = parser.parse_program(); + + //formatter + if !parser.errors.is_empty() { + for err in &parser.errors { + print_caret( + &lines, + file, + err.line, + err.column, + &LintSeverity::Error, + None, + &err.message, + ); + } + } + + let formatted = Formatter::format(&program); + if stdout { + print!("{:?}", formatted) + } else if formatted != src { + fs::write(file, formatted)?; + println!("{}: formatted", file.display()); + } + } + + Ok(()) +} diff --git a/crates/fmt/src/formatter.rs b/crates/fmt/src/formatter.rs new file mode 100644 index 0000000..f30b8cb --- /dev/null +++ b/crates/fmt/src/formatter.rs @@ -0,0 +1,14 @@ +use code_lang::ast::ast::{Expression, Program, Statement}; + +pub struct Formatter { + indent_level: usize, + output: String +} + +const IDENT:&str = " "; // 4 spaces + +impl Formatter { + pub fn format(program: &Program){ } + pub fn fmt_statement(&mut self, stmt:&Statement){} + pub fn fmt_expression(&mut self, expr:&Expression){} +} \ No newline at end of file diff --git a/crates/fmt/src/lint_rules.rs b/crates/fmt/src/lint_rules.rs index 81c5b96..9fab276 100644 --- a/crates/fmt/src/lint_rules.rs +++ b/crates/fmt/src/lint_rules.rs @@ -4,7 +4,7 @@ use code_lang::{ analysis::scope::ScopeTree, ast::{ ast::{Expression, LetPattern, Statement}, - walk::{Visitor, walk_statement}, + walk::{Visitor, walk_expression, walk_statement}, }, token::token::TokenType, }; @@ -170,6 +170,10 @@ impl Visitor for ShadowedBinding { fn visit_const(&mut self, pattern: &LetPattern, _value: &Expression, line: usize, col: usize) { self.register_pattern(pattern, line, col); } + + fn visit_expression(&mut self, expr: &Expression) { + + } } impl LintRule for ShadowedBinding { @@ -215,6 +219,21 @@ impl Visitor for UnusedVariable { } } + fn visit_const( + &mut self, + pattern: &LetPattern, + _value: &Expression, + line: usize, + col: usize, + ) + { + match pattern { + LetPattern::Array(names) => for name in names { self.declared.insert(name.to_string(), (line, col, false));}, + LetPattern::Hash(pairs) => for (_, alias) in pairs { self.declared.insert(alias.to_string(), (line, col, false));}, + LetPattern::Ident(n) => { self.declared.insert(n.to_string(), (line,col, false));} + } + } + fn visit_ident(&mut self, value: &str, _line: usize, _col: usize) { self.used.insert(value.to_string()); } @@ -390,6 +409,8 @@ impl Visitor for DeadCode { self.returned = saved; } + + walk_statement(self, stmt); } } @@ -506,6 +527,8 @@ impl Visitor for EmptyBlock { fix: None, }); } + + walk_expression(self, expr); } } @@ -557,6 +580,17 @@ impl Visitor for UndefinedVariable { }); } } + + fn visit_member( + &mut self, + object: &Expression, + _property: &Expression, + _line: usize, + _col: usize, + ) + { + self.visit_expression(object); + } } impl LintRule for UndefinedVariable { diff --git a/crates/fmt/src/main.rs b/crates/fmt/src/main.rs index 22c6dff..3e55302 100644 --- a/crates/fmt/src/main.rs +++ b/crates/fmt/src/main.rs @@ -1,12 +1,13 @@ mod commands; mod lint_rules; mod util; +mod formatter; use std::path::PathBuf; use anyhow::Result; use clap::{Parser, Subcommand}; -use crate::commands::{check_file, lint_file}; +use crate::commands::{check_file, format_file, lint_file}; #[derive(Parser)] #[command(name = "code-lang-fmt")] @@ -28,6 +29,13 @@ enum Commands { #[arg(long)] fix: bool, }, + + Format { + files: Vec, + + #[arg(long)] + stdout:bool + } } fn main() -> Result<()> { @@ -39,6 +47,9 @@ fn main() -> Result<()> { Some(Commands::Lint { files, fix }) => { lint_file(&files, fix)?; } + Some(Commands::Format { files, stdout }) => { + format_file(&files, stdout)?; + }, None => {} } Ok(()) From dc3ee830c1b0f23bc2d576637b3e093df40b13d4 Mon Sep 17 00:00:00 2001 From: walonCode Date: Fri, 26 Jun 2026 07:19:44 +0000 Subject: [PATCH 5/5] =?UTF-8?q?docs(stdlib):=20specific=20return=20shapes?= =?UTF-8?q?=20=E2=80=94=20[string],=20[[any,any]],=20{=20status:=20integer?= =?UTF-8?q?,=20ok:=20bool,=20body:=20string=20},=20etc.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/app/docs/stdlib/page.tsx | 260 +++++++++++++++++----------------- 1 file changed, 133 insertions(+), 127 deletions(-) diff --git a/docs/app/docs/stdlib/page.tsx b/docs/app/docs/stdlib/page.tsx index c863ecf..ce66158 100644 --- a/docs/app/docs/stdlib/page.tsx +++ b/docs/app/docs/stdlib/page.tsx @@ -3,197 +3,201 @@ import Pre from "../../components/Pre"; export const metadata: Metadata = { title: "Standard library" }; -const MODULES = [ +const MODULES: { + name: string; + desc: string; + fns: [string, string, string][]; +}[] = [ { name: "fmt", desc: "Output, input, and type conversion.", fns: [ - ["print(...args)", "Print args space-separated to stdout with newline."], - ["eprint(...args)", "Same as print but to stderr."], - ["input(prompt)", "Print prompt and read a line from stdin. Returns STRING."], - ["typeof(x)", "Return the type name of x as a STRING."], - ["to_int(x)", "Convert STRING, FLOAT, or BOOL to INTEGER."], - ["to_float(x)", "Convert STRING or INTEGER to FLOAT."], - ["to_str(x)", "Convert any value to its STRING representation."], - ["clear()", "Clear the terminal screen."], - ["format(template, ...args)", "Printf-style formatting: %s string, %d integer, %f float, %% literal percent."], + ["print(...args)", "Print args space-separated to stdout with newline.", "null"], + ["eprint(...args)", "Same as print but to stderr.", "null"], + ["input(prompt)", "Print prompt and read a line from stdin.", "string"], + ["typeof(x)", "Return the type name of x.", "string"], + ["to_int(x)", "Convert STRING, FLOAT, or BOOL to INTEGER.", "integer"], + ["to_float(x)", "Convert STRING or INTEGER to FLOAT.", "float"], + ["to_str(x)", "Convert any value to its string representation.", "string"], + ["clear()", "Clear the terminal screen.", "null"], + ["format(template, ...args)", "Printf-style formatting: %s string, %d integer, %f float, %% literal percent.", "string"], ], }, { name: "math", desc: "Mathematical functions and constants.", fns: [ - ["PI", "3.141592… (constant)"], - ["E", "2.718281… (constant)"], - ["sqrt(n)", "Square root."], - ["abs(n)", "Absolute value. Returns same type as input."], - ["pow(base, exp)", "base raised to exp. Returns FLOAT."], - ["floor(n) / ceil(n) / round(n) / trunc(n)", "Rounding variants."], - ["log(n)", "Natural logarithm (ln)."], - ["log10(n)", "Base-10 logarithm."], - ["log2(n)", "Base-2 logarithm."], - ["exp(n)", "e raised to n."], - ["sin(n) / cos(n) / tan(n)", "Trigonometric functions (radians)."], - ["min(a, b, …) / max(a, b, …)", "Minimum / maximum of one or more numbers."], - ["clamp(x, lo, hi)", "Clamp x to the range [lo, hi]."], - ["sign(n)", "Returns -1, 0, or 1 based on the sign of n."], - ["gcd(a, b)", "Greatest common divisor (integers)."], - ["lcm(a, b)", "Least common multiple (integers)."], + ["PI", "3.141592… (constant)", "float"], + ["E", "2.718281… (constant)", "float"], + ["sqrt(n)", "Square root.", "float"], + ["abs(n)", "Absolute value — preserves input type.", "integer | float"], + ["pow(base, exp)", "base raised to exp.", "float"], + ["floor(n) / ceil(n) / round(n) / trunc(n)", "Rounding variants.", "integer"], + ["log(n)", "Natural logarithm (ln).", "float"], + ["log10(n)", "Base-10 logarithm.", "float"], + ["log2(n)", "Base-2 logarithm.", "float"], + ["exp(n)", "e raised to n.", "float"], + ["sin(n) / cos(n) / tan(n)", "Trigonometric functions (radians).", "float"], + ["min(a, b, …) / max(a, b, …)", "Minimum / maximum of one or more numbers.", "integer | float"], + ["clamp(x, lo, hi)", "Clamp x to the range [lo, hi].", "integer | float"], + ["sign(n)", "Returns -1, 0, or 1 based on the sign of n.", "integer"], + ["gcd(a, b)", "Greatest common divisor (integers).", "integer"], + ["lcm(a, b)", "Least common multiple (integers).", "integer"], ], }, { name: "strings", desc: "String manipulation.", fns: [ - ["to_upper(s) / to_lower(s)", "Case conversion."], - ["trim(s) / trim_left(s) / trim_right(s)", "Remove whitespace."], - ["split(s, sep)", "Split string into ARRAY of strings."], - ["join(arr, sep)", "Join array into a string."], - ["contains(s, sub)", "BOOL — whether s contains sub."], - ["starts_with(s, prefix) / ends_with(s, suffix)", "BOOL prefix/suffix check."], - ["replace(s, old, new)", "Replace all occurrences of old with new."], - ["index(s, sub)", "First index of sub in s, or -1."], - ["count(s, sub)", "Number of non-overlapping occurrences of sub."], - ["repeat(s, n)", "Repeat s n times."], - ["reverse(s)", "Reverse the string."], - ["to_chars(s)", "ARRAY of CHAR values."], - ["from_chars(arr)", "Build a STRING from an array of CHARs."], - ["parse_int(s) / parse_float(s)", "Parse string to number."], - ["lines(s)", "Split by newline into ARRAY of strings."], - ["is_empty(s)", "BOOL — true if string has zero characters."], - ["pad_left(s, n, ch)", "Left-pad s to width n using char ch."], - ["pad_right(s, n, ch)", "Right-pad s to width n using char ch."], + ["to_upper(s) / to_lower(s)", "Case conversion.", "string"], + ["trim(s) / trim_left(s) / trim_right(s)", "Remove whitespace.", "string"], + ["split(s, sep)", "Split string on sep.", "[string]"], + ["join(arr, sep)", "Join array into a string.", "string"], + ["contains(s, sub)", "Whether s contains sub.", "bool"], + ["starts_with(s, prefix) / ends_with(s, suffix)", "Prefix or suffix check.", "bool"], + ["replace(s, old, new)", "Replace all occurrences of old with new.", "string"], + ["index(s, sub)", "First index of sub in s, or -1.", "integer"], + ["count(s, sub)", "Number of non-overlapping occurrences of sub.", "integer"], + ["repeat(s, n)", "Repeat s n times.", "string"], + ["reverse(s)", "Reverse the string.", "string"], + ["to_chars(s)", "Split s into individual characters.", "[char]"], + ["from_chars(arr)", "Build a string from an array of chars.", "string"], + ["parse_int(s) / parse_float(s)", "Parse string to number.", "integer | float"], + ["lines(s)", "Split by newline.", "[string]"], + ["is_empty(s)", "True if string has zero characters.", "bool"], + ["pad_left(s, n, ch)", "Left-pad s to width n using char ch.", "string"], + ["pad_right(s, n, ch)", "Right-pad s to width n using char ch.", "string"], ], }, { name: "arrays", - desc: "Array operations. All functions return new arrays — no mutation.", + desc: "Array operations. All functions return new values — no mutation.", fns: [ - ["len(arr)", "Length of array (also works on STRING)."], - ["first(arr) / last(arr)", "First / last element, or null if empty."], - ["rest(arr)", "Array without the first element."], - ["pop(arr)", "Array without the last element."], - ["push(arr, x) / prepend(arr, x)", "Return new array with x appended / prepended."], - ["concat(a, b)", "Concatenate two arrays."], - ["reverse(arr)", "Return reversed array."], - ["slice(arr, start, end)", "Subarray from start (inclusive) to end (exclusive)."], - ["contains(arr, x)", "BOOL — whether arr contains x."], - ["index_of(arr, x)", "First index of x, or -1."], - ["join(arr, sep)", "Join elements into a STRING."], - ["sum(arr)", "Sum of numeric elements."], - ["min(arr) / max(arr)", "Min / max of numeric array."], - ["sort(arr)", "Sorted copy (numbers and strings)."], - ["unique(arr)", "Remove duplicates, preserve order."], - ["flatten(arr)", "Flatten one level of nesting."], - ["zip(a, b)", "Array of [a[i], b[i]] pairs, stops at shorter."], - ["map(arr, fn)", "Return new array of fn(element) results."], - ["filter(arr, fn)", "Return elements where fn(element) is truthy."], - ["reduce(arr, fn, init)", "Accumulate fn(acc, element) left-to-right, starting from init."], - ["find(arr, fn)", "First element where fn(element) is truthy, or null."], - ["any(arr, fn)", "BOOL — true if any element passes fn."], - ["all(arr, fn)", "BOOL — true if all elements pass fn."], + ["len(arr)", "Number of elements.", "integer"], + ["first(arr) / last(arr)", "First or last element, or null if empty.", "any | null"], + ["rest(arr)", "Array without the first element.", "[any]"], + ["pop(arr)", "Array without the last element.", "[any]"], + ["push(arr, x) / prepend(arr, x)", "New array with x appended or prepended.", "[any]"], + ["concat(a, b)", "Concatenate two arrays.", "[any]"], + ["reverse(arr)", "Reversed copy.", "[any]"], + ["slice(arr, start, end)", "Subarray from start (inclusive) to end (exclusive).", "[any]"], + ["contains(arr, x)", "Whether arr contains x.", "bool"], + ["index_of(arr, x)", "First index of x, or -1.", "integer"], + ["join(arr, sep)", "Join elements into a string.", "string"], + ["sum(arr)", "Sum of numeric elements.", "integer | float"], + ["min(arr) / max(arr)", "Min / max of a numeric array.", "integer | float"], + ["sort(arr)", "Sorted copy (numbers and strings).", "[any]"], + ["unique(arr)", "Remove duplicates, preserve order.", "[any]"], + ["flatten(arr)", "Flatten one level of nesting.", "[any]"], + ["zip(a, b)", "Pair elements: a[i] with b[i], stops at shorter.", "[[any, any]]"], + ["map(arr, fn)", "New array of fn(element) results.", "[any]"], + ["filter(arr, fn)", "Elements where fn(element) is truthy.", "[any]"], + ["reduce(arr, fn, init)", "Accumulate fn(acc, element) left-to-right from init.", "any"], + ["find(arr, fn)", "First element where fn(element) is truthy, or null.", "any | null"], + ["any(arr, fn)", "True if at least one element passes fn.", "bool"], + ["all(arr, fn)", "True if every element passes fn.", "bool"], ], }, { name: "hash", desc: "Hash (dictionary) operations.", fns: [ - ["keys(h)", "ARRAY of keys."], - ["values(h)", "ARRAY of values."], - ["entries(h)", "ARRAY of [key, value] pairs."], - ["has_key(h, k)", "BOOL — whether key k exists."], - ["get(h, k, default)", "Value for key k, or default if key is absent."], - ["len(h)", "Number of key-value pairs."], - ["merge(h1, h2)", "New hash with both; h2 overwrites h1 on conflicts."], - ["delete(h, k)", "New hash without key k."], + ["keys(h)", "All keys.", "[any]"], + ["values(h)", "All values.", "[any]"], + ["entries(h)", "All key-value pairs.", "[[any, any]]"], + ["has_key(h, k)", "Whether key k exists.", "bool"], + ["get(h, k, default)", "Value for key k, or default if absent.", "any"], + ["len(h)", "Number of key-value pairs.", "integer"], + ["merge(h1, h2)", "New hash with both; h2 wins on conflicts.", "{ ...h1, ...h2 }"], + ["delete(h, k)", "New hash without key k.", "{ ...h minus k }"], ], }, { name: "fs", desc: "File system I/O.", fns: [ - ["read_file(path)", "Read file contents as STRING."], - ["write_file(path, content)", "Write STRING to file (overwrite). Returns BOOL."], - ["append_file(path, content)", "Append STRING to file (creates if missing). Returns BOOL."], - ["read_lines(path)", "Read file into ARRAY of strings (one per line)."], - ["exists(path)", "BOOL — path exists."], - ["is_file(path) / is_dir(path)", "BOOL — type check."], - ["list_dir(path)", "ARRAY of filenames in directory."], - ["mkdir(path) / mkdir_all(path)", "Create directory / all intermediate directories."], - ["remove(path)", "Delete a file."], - ["remove_dir(path)", "Delete a directory and all its contents."], - ["copy(src, dst) / rename(src, dst)", "Copy or rename a file."], + ["read_file(path)", "Read file contents.", "string | error"], + ["write_file(path, content)", "Write string to file (overwrite).", "bool"], + ["append_file(path, content)", "Append string to file (creates if missing).", "bool"], + ["read_lines(path)", "Read file into one string per line.", "[string] | error"], + ["exists(path)", "Whether path exists.", "bool"], + ["is_file(path) / is_dir(path)", "Type check.", "bool"], + ["list_dir(path)", "Filenames inside a directory.", "[string] | error"], + ["mkdir(path) / mkdir_all(path)", "Create directory / all intermediate dirs.", "bool"], + ["remove(path)", "Delete a file.", "bool"], + ["remove_dir(path)", "Delete a directory and all its contents.", "bool"], + ["copy(src, dst) / rename(src, dst)", "Copy or rename a file.", "bool"], ], }, { name: "path", desc: "Path string manipulation — no filesystem access except absolute().", fns: [ - ["join(a, b, …)", "Join path segments with the OS separator."], - ["basename(p)", "Filename with extension."], - ["dirname(p)", "Parent directory."], - ["stem(p)", "Filename without extension."], - ["extension(p)", "Extension without the leading dot."], - ["absolute(p)", "Canonicalized absolute path (hits filesystem)."], - ["is_absolute(p)", "BOOL — whether path is absolute."], + ["join(a, b, …)", "Join path segments with the OS separator.", "string"], + ["basename(p)", "Filename with extension.", "string"], + ["dirname(p)", "Parent directory.", "string"], + ["stem(p)", "Filename without extension.", "string"], + ["extension(p)", "Extension without the leading dot.", "string"], + ["absolute(p)", "Canonicalized absolute path.", "string"], + ["is_absolute(p)", "Whether path is absolute.", "bool"], ], }, { name: "os", desc: "Operating system interface.", fns: [ - ["args", "ARRAY of command-line arguments (value, not function)."], - ["platform", "OS name string, e.g. \"linux\", \"macos\", \"windows\" (value)."], - ["arch", "CPU architecture string (value)."], - ["get_env(key)", "Read an environment variable. Returns STRING (empty if unset)."], - ["set_env(key, val)", "Set an environment variable."], - ["get_wd()", "Current working directory as STRING."], - ["hostname()", "Machine hostname as STRING."], - ["exit(code?)", "Exit the process with optional integer code (default 0)."], + ["args", "Command-line arguments passed to the script (value, not function).", "[string]"], + ["platform", "OS name — e.g. \"linux\", \"macos\", \"windows\" (value).", "string"], + ["arch", "CPU architecture string (value).", "string"], + ["get_env(key)", "Read an environment variable. Empty string if unset.", "string"], + ["set_env(key, val)", "Set an environment variable.", "null"], + ["get_wd()", "Current working directory.", "string"], + ["hostname()", "Machine hostname.", "string"], + ["exit(code?)", "Exit the process. Optional integer exit code (default 0).", "—"], ], }, { name: "time", - desc: "Date and time. Timestamps are unix milliseconds (INTEGER).", + desc: "Date and time. All timestamps are unix milliseconds.", fns: [ - ["now()", "Current time as unix milliseconds."], - ["unix()", "Current time as unix seconds."], - ["sleep(ms)", "Pause execution for ms milliseconds."], - ["since(start_ms)", "Milliseconds elapsed since start_ms."], - ["format(ms, layout)", "Format timestamp using a strftime-style layout string."], - ["year(ms) / month(ms) / day(ms)", "Date components (UTC). Month is 1–12."], - ["hour(ms) / minute(ms) / second(ms)", "Time components (UTC)."], - ["RFC3339", "\"%Y-%m-%dT%H:%M:%S%z\" layout constant."], - ["Kitchen", "\"%I:%M %p\" layout constant."], + ["now()", "Current time as unix milliseconds.", "integer"], + ["unix()", "Current time as unix seconds.", "integer"], + ["sleep(ms)", "Pause execution for ms milliseconds.", "null"], + ["since(start_ms)", "Milliseconds elapsed since start_ms.", "integer"], + ["format(ms, layout)", "Format a timestamp with a strftime-style layout string.", "string"], + ["year(ms) / month(ms) / day(ms)", "Date components (UTC). month() is 1–12.", "integer"], + ["hour(ms) / minute(ms) / second(ms)", "Time components (UTC).", "integer"], + ["RFC3339", "\"%Y-%m-%dT%H:%M:%S%z\" layout constant (value).", "string"], + ["Kitchen", "\"%I:%M %p\" layout constant (value).", "string"], ], }, { name: "json", desc: "JSON serialisation.", fns: [ - ["parse(s)", "Parse a JSON string into code-lang values. JSON objects become HASH."], - ["stringify(x)", "Serialize a value to a JSON STRING."], + ["parse(s)", "Parse a JSON string. JSON objects become hash, arrays become array.", "hash | [any] | string | integer | float | bool | null"], + ["stringify(x)", "Serialize a value to a JSON string.", "string"], ], }, { name: "rand", desc: "Random number generation.", fns: [ - ["int(min, max)", "Random INTEGER in [min, max] inclusive."], - ["float()", "Random FLOAT in [0.0, 1.0)."], - ["choice(arr)", "Random element from array."], - ["shuffle(arr)", "Return a new shuffled copy of the array (Fisher-Yates)."], + ["int(min, max)", "Random integer in [min, max] inclusive.", "integer"], + ["float()", "Random float in [0.0, 1.0).", "float"], + ["choice(arr)", "Random element from array, or null if empty.", "any | null"], + ["shuffle(arr)", "Shuffled copy (Fisher-Yates).", "[any]"], ], }, { name: "http", - desc: "Blocking HTTP client. All functions return a HASH with status (INTEGER), body (STRING), and ok (BOOL).", + desc: "Blocking HTTP client.", fns: [ - ["get(url)", "HTTP GET."], - ["get(url, headers)", "HTTP GET with custom headers (HASH of STRING→STRING)."], - ["post(url, body)", "HTTP POST with a plain string body."], - ["post(url, body, headers)", "HTTP POST with custom headers."], - ["post_json(url, obj)", "HTTP POST with JSON body (sets Content-Type automatically)."], + ["get(url)", "HTTP GET.", "{ status: integer, ok: bool, body: string }"], + ["get(url, headers)", "HTTP GET with custom headers.", "{ status: integer, ok: bool, body: string }"], + ["post(url, body)", "HTTP POST with a plain string body.", "{ status: integer, ok: bool, body: string }"], + ["post(url, body, headers)", "HTTP POST with custom headers.", "{ status: integer, ok: bool, body: string }"], + ["post_json(url, obj)", "HTTP POST with JSON body — sets Content-Type automatically.", "{ status: integer, ok: bool, body: string }"], ], }, ]; @@ -208,8 +212,8 @@ export default function StdlibReference() {
{`import "math";
 import "strings";
 
-math.sqrt(9);               # 3.0
-strings.to_upper("hello");  # HELLO`}
+math.sqrt(9); # 3.0 → float +strings.split("a,b,c", ","); # ["a", "b", "c"] → [string]`} {MODULES.map((mod) => (
@@ -222,13 +226,15 @@ strings.to_upper("hello"); # HELLO`} Function / value Description + Returns - {mod.fns.map(([sig, desc]) => ( + {mod.fns.map(([sig, desc, ret]) => ( {sig} {desc} + {ret} ))}