diff --git a/src/items/builder.rs b/src/items/builder.rs index 3465178..681a99c 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -517,17 +517,52 @@ impl TryFrom> for DateTimeBuilder { fn try_from(items: Vec) -> Result { let mut builder = DateTimeBuilder::new(); + // GNU date silently ignores unrecognized alphabetic tokens that directly + // follow a pure number (e.g. `8j` or `8 j` → 08:00:00). A Noise token + // is only valid in that position; anywhere else it is an error. + let mut prev_was_pure = false; for item in items { builder = match item { - Item::DateTime(dt) => builder.set_date(dt.date)?.set_time(dt.time)?, - Item::Date(d) => builder.set_date(d)?, - Item::Time(t) => builder.set_time(t)?, - Item::Weekday(weekday) => builder.set_weekday(weekday)?, - Item::Offset(offset) => builder.set_offset(offset)?, - Item::Relative(rel) => builder.push_relative(rel)?, - Item::TimeZone(tz) => builder.set_timezone(tz)?, - Item::Pure(pure) => builder.set_pure(pure)?, + Item::Noise => { + if !prev_was_pure { + return Err("unrecognized token"); + } + prev_was_pure = false; + builder + } + Item::Pure(pure) => { + prev_was_pure = true; + builder.set_pure(pure)? + } + Item::DateTime(dt) => { + prev_was_pure = false; + builder.set_date(dt.date)?.set_time(dt.time)? + } + Item::Date(d) => { + prev_was_pure = false; + builder.set_date(d)? + } + Item::Time(t) => { + prev_was_pure = false; + builder.set_time(t)? + } + Item::Weekday(weekday) => { + prev_was_pure = false; + builder.set_weekday(weekday)? + } + Item::Offset(offset) => { + prev_was_pure = false; + builder.set_offset(offset)? + } + Item::Relative(rel) => { + prev_was_pure = false; + builder.push_relative(rel)? + } + Item::TimeZone(tz) => { + prev_was_pure = false; + builder.set_timezone(tz)? + } } } diff --git a/src/items/mod.rs b/src/items/mod.rs index a1529bb..a0f4e60 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -54,7 +54,8 @@ use primitive::space; use winnow::{ combinator::{alt, eof, preceded, repeat_till, terminated, trace}, error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue}, - stream::Stream, + stream::{AsChar, Stream}, + token::take_while, ModalResult, Parser, }; @@ -71,6 +72,9 @@ enum Item { Offset(offset::Offset), TimeZone(jiff::tz::TimeZone), Pure(String), + /// An unrecognized alphabetic token silently ignored for GNU `date` compatibility. + /// GNU `date` ignores trailing word-tokens it doesn't recognize (e.g. `8j` → 08:00:00). + Noise, } /// Parse a date and time string and resolve it against the given base date and @@ -261,11 +265,26 @@ fn parse_item(input: &mut &str) -> ModalResult { weekday::parse.map(Item::Weekday), offset::parse.map(Item::Offset), pure::parse.map(Item::Pure), + noise_token, )), ) .parse_next(input) } +/// Consume an unrecognized alphabetic word and silently discard it. +/// +/// GNU `date` ignores trailing word-tokens it does not recognize (issue #279). +/// For example, `8j` is accepted and the `j` is silently dropped, yielding +/// 08:00:00, just as GNU `date -d '8j'` does. +/// +/// This parser is the last alternative in `parse_item`, so it only fires after +/// every other item parser has already failed. +fn noise_token(input: &mut &str) -> ModalResult { + primitive::s(take_while(1.., AsChar::is_alpha)) + .map(|_| Item::Noise) + .parse_next(input) +} + /// Create an error with context for unexpected input. fn expect_error(input: &mut &str, reason: &'static str) -> ErrMode { ErrMode::Cut(ContextError::new()).add_context( @@ -724,6 +743,37 @@ mod tests { assert_eq!(result.second(), 0); } + /// GNU `date` silently ignores unrecognized alphabetic tokens that trail a + /// pure number (issue #279). E.g. `8j` and `8 j` both produce 08:00:00. + #[test] + fn noise_after_pure_number() { + let now = Zoned::now().with_time_zone(TimeZone::UTC); + + // Adjacent suffix: "8j" → hour 8 + let result = at_date(parse(&mut "8j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Space-separated suffix: "8 j" → hour 8 + let result = at_date(parse(&mut "8 j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Noise following a full date+pure-time: "1230foo" → 12:30 + let result = at_date(parse(&mut "1230foo").unwrap(), now.clone()); + assert_eq!(result.hour(), 12); + assert_eq!(result.minute(), 30); + + // Noise must NOT be accepted when it precedes a real item (leading garbage). + assert!(parse(&mut "bogus +1 day").is_err()); + // Noise must NOT be accepted after a non-pure item (e.g. after a date). + assert!(parse(&mut "2025-01-01 abcdef").is_err()); + // A standalone unrecognized word is still an error. + assert!(parse(&mut "notadate").is_err()); + } + #[test] fn pure() { let now = Zoned::now().with_time_zone(TimeZone::UTC); diff --git a/src/items/offset.rs b/src/items/offset.rs index 9dc42f1..ddc132f 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,6 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), @@ -423,6 +424,7 @@ mod tests { fn timezone_name_without_offset() { for (input, expected) in [ ("utc", off(false, 0, 0)), // UTC + ("ut", off(false, 0, 0)), // Universal Time = UTC (issue #280) ("gmt", off(false, 0, 0)), // UTC ("z", off(false, 0, 0)), // UTC ("west", off(false, 1, 0)), // positive offset diff --git a/tests/date.rs b/tests/date.rs index c57f782..555cacf 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -285,3 +285,30 @@ fn test_multiple_month_skip(#[case] base: &str, #[case] input: &str, #[case] exp fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } + +// Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be +// accepted as UTC. GNU date accepts these; previously parse_datetime rejected them +// because 'ut' was absent from the named-timezone table. +#[test] +fn test_bare_ut_timezone_is_accepted() { + use parse_datetime::parse_datetime; + for input in ["ut", "UT", "Ut", "uT"] { + let result = parse_datetime(input); + assert!( + result.is_ok(), + "expected bare timezone '{}' to be accepted, got: {:?}", + input, + result, + ); + let offset_secs = result + .unwrap() + .as_zoned() + .map(|z| z.offset().seconds()) + .unwrap_or(0); + assert_eq!( + offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", + offset_secs + ); + } +}