From 94b5cb12f051bb21e23ef222418d3f2fcb378e4c Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Sun, 5 Apr 2026 21:08:04 -0400 Subject: [PATCH 1/3] fix(offset): add "ut" (Universal Time) to named-timezone table GNU date accepts bare "UT" and "ut" as a synonym for UTC (+0). parse_datetime rejected them because the abbreviation was absent from the named-timezone lookup table in timezone_name_to_offset(). Add "ut" => Ok("+0") immediately after the existing "utc" entry and add a regression test that verifies all four case variants are accepted and resolve to a UTC-offset-0 instant. Fixes #280 Co-Authored-By: Claude Opus 4.6 --- src/items/offset.rs | 2 ++ tests/date.rs | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/items/offset.rs b/src/items/offset.rs index 9dc42f1..9a61161 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,6 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), @@ -423,6 +424,7 @@ mod tests { fn timezone_name_without_offset() { for (input, expected) in [ ("utc", off(false, 0, 0)), // UTC + ("ut", off(false, 0, 0)), // Universal Time = UTC (issue #280) ("gmt", off(false, 0, 0)), // UTC ("z", off(false, 0, 0)), // UTC ("west", off(false, 1, 0)), // positive offset diff --git a/tests/date.rs b/tests/date.rs index c57f782..1ef547b 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -285,3 +285,26 @@ fn test_multiple_month_skip(#[case] base: &str, #[case] input: &str, #[case] exp fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } + + +// Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be +// accepted as UTC. GNU date accepts these; previously parse_datetime rejected them +// because 'ut' was absent from the named-timezone table. +#[test] +fn test_bare_ut_timezone_is_accepted() { + use parse_datetime::parse_datetime; + for input in ["ut", "UT", "Ut", "uT"] { + let result = parse_datetime(input); + assert!( + result.is_ok(), + "expected bare timezone '{}' to be accepted, got: {:?}", + input, result, + ); + let offset_secs = result.unwrap() + .as_zoned() + .map(|z| z.offset().seconds()) + .unwrap_or(0); + assert_eq!(offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", offset_secs); + } +} From 104b2588302b5f639372b6d3fe9af3e011c26da8 Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Sun, 5 Apr 2026 21:14:03 -0400 Subject: [PATCH 2/3] fix: silently ignore unrecognized trailing alphabetic tokens after pure numbers GNU date accepts inputs like '8j' and '8 j', treating the number as an hour and silently discarding the unrecognized trailing word-token. This commit matches that behaviour. Implementation: - Add Item::Noise variant for unrecognized alphabetic tokens - Add noise_token() as the last alternative in parse_item(), so it only fires after every other parser has failed - In DateTimeBuilder::try_from, accept Noise only when it directly follows a Pure number item (prev_was_pure guard); reject it anywhere else so that leading garbage (e.g. 'bogus +1 day') and post-date garbage (e.g. '2025-01-01 abcdef') still produce errors - Add noise_after_pure_number regression test covering both '8j' and '8 j' Fixes #279 --- src/items/builder.rs | 51 ++++++++++++++++++++++++++++++++++++------- src/items/mod.rs | 52 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/src/items/builder.rs b/src/items/builder.rs index 3465178..681a99c 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -517,17 +517,52 @@ impl TryFrom> for DateTimeBuilder { fn try_from(items: Vec) -> Result { let mut builder = DateTimeBuilder::new(); + // GNU date silently ignores unrecognized alphabetic tokens that directly + // follow a pure number (e.g. `8j` or `8 j` → 08:00:00). A Noise token + // is only valid in that position; anywhere else it is an error. + let mut prev_was_pure = false; for item in items { builder = match item { - Item::DateTime(dt) => builder.set_date(dt.date)?.set_time(dt.time)?, - Item::Date(d) => builder.set_date(d)?, - Item::Time(t) => builder.set_time(t)?, - Item::Weekday(weekday) => builder.set_weekday(weekday)?, - Item::Offset(offset) => builder.set_offset(offset)?, - Item::Relative(rel) => builder.push_relative(rel)?, - Item::TimeZone(tz) => builder.set_timezone(tz)?, - Item::Pure(pure) => builder.set_pure(pure)?, + Item::Noise => { + if !prev_was_pure { + return Err("unrecognized token"); + } + prev_was_pure = false; + builder + } + Item::Pure(pure) => { + prev_was_pure = true; + builder.set_pure(pure)? + } + Item::DateTime(dt) => { + prev_was_pure = false; + builder.set_date(dt.date)?.set_time(dt.time)? + } + Item::Date(d) => { + prev_was_pure = false; + builder.set_date(d)? + } + Item::Time(t) => { + prev_was_pure = false; + builder.set_time(t)? + } + Item::Weekday(weekday) => { + prev_was_pure = false; + builder.set_weekday(weekday)? + } + Item::Offset(offset) => { + prev_was_pure = false; + builder.set_offset(offset)? + } + Item::Relative(rel) => { + prev_was_pure = false; + builder.push_relative(rel)? + } + Item::TimeZone(tz) => { + prev_was_pure = false; + builder.set_timezone(tz)? + } } } diff --git a/src/items/mod.rs b/src/items/mod.rs index a1529bb..a0f4e60 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -54,7 +54,8 @@ use primitive::space; use winnow::{ combinator::{alt, eof, preceded, repeat_till, terminated, trace}, error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue}, - stream::Stream, + stream::{AsChar, Stream}, + token::take_while, ModalResult, Parser, }; @@ -71,6 +72,9 @@ enum Item { Offset(offset::Offset), TimeZone(jiff::tz::TimeZone), Pure(String), + /// An unrecognized alphabetic token silently ignored for GNU `date` compatibility. + /// GNU `date` ignores trailing word-tokens it doesn't recognize (e.g. `8j` → 08:00:00). + Noise, } /// Parse a date and time string and resolve it against the given base date and @@ -261,11 +265,26 @@ fn parse_item(input: &mut &str) -> ModalResult { weekday::parse.map(Item::Weekday), offset::parse.map(Item::Offset), pure::parse.map(Item::Pure), + noise_token, )), ) .parse_next(input) } +/// Consume an unrecognized alphabetic word and silently discard it. +/// +/// GNU `date` ignores trailing word-tokens it does not recognize (issue #279). +/// For example, `8j` is accepted and the `j` is silently dropped, yielding +/// 08:00:00, just as GNU `date -d '8j'` does. +/// +/// This parser is the last alternative in `parse_item`, so it only fires after +/// every other item parser has already failed. +fn noise_token(input: &mut &str) -> ModalResult { + primitive::s(take_while(1.., AsChar::is_alpha)) + .map(|_| Item::Noise) + .parse_next(input) +} + /// Create an error with context for unexpected input. fn expect_error(input: &mut &str, reason: &'static str) -> ErrMode { ErrMode::Cut(ContextError::new()).add_context( @@ -724,6 +743,37 @@ mod tests { assert_eq!(result.second(), 0); } + /// GNU `date` silently ignores unrecognized alphabetic tokens that trail a + /// pure number (issue #279). E.g. `8j` and `8 j` both produce 08:00:00. + #[test] + fn noise_after_pure_number() { + let now = Zoned::now().with_time_zone(TimeZone::UTC); + + // Adjacent suffix: "8j" → hour 8 + let result = at_date(parse(&mut "8j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Space-separated suffix: "8 j" → hour 8 + let result = at_date(parse(&mut "8 j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Noise following a full date+pure-time: "1230foo" → 12:30 + let result = at_date(parse(&mut "1230foo").unwrap(), now.clone()); + assert_eq!(result.hour(), 12); + assert_eq!(result.minute(), 30); + + // Noise must NOT be accepted when it precedes a real item (leading garbage). + assert!(parse(&mut "bogus +1 day").is_err()); + // Noise must NOT be accepted after a non-pure item (e.g. after a date). + assert!(parse(&mut "2025-01-01 abcdef").is_err()); + // A standalone unrecognized word is still an error. + assert!(parse(&mut "notadate").is_err()); + } + #[test] fn pure() { let now = Zoned::now().with_time_zone(TimeZone::UTC); From 118bb5ae582ccd4cf21932e647645b889ed04cc3 Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Mon, 6 Apr 2026 05:21:51 -0400 Subject: [PATCH 3/3] style: run rustfmt on stacked UT/noise parser fixes --- src/items/offset.rs | 2 +- tests/date.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/items/offset.rs b/src/items/offset.rs index 9a61161..ddc132f 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,7 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), - "ut" => Ok("+0"), // Universal Time = UTC + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), diff --git a/tests/date.rs b/tests/date.rs index 1ef547b..555cacf 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -286,7 +286,6 @@ fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } - // Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be // accepted as UTC. GNU date accepts these; previously parse_datetime rejected them // because 'ut' was absent from the named-timezone table. @@ -298,13 +297,18 @@ fn test_bare_ut_timezone_is_accepted() { assert!( result.is_ok(), "expected bare timezone '{}' to be accepted, got: {:?}", - input, result, + input, + result, ); - let offset_secs = result.unwrap() + let offset_secs = result + .unwrap() .as_zoned() .map(|z| z.offset().seconds()) .unwrap_or(0); - assert_eq!(offset_secs, 0, - "expected 'ut' to resolve to UTC offset 0, got {} seconds", offset_secs); + assert_eq!( + offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", + offset_secs + ); } }