From c853f892abb4d81db785b0bd8f3c7a8d7c3fc455 Mon Sep 17 00:00:00 2001 From: Danny Gershman Date: Sun, 14 Jun 2026 21:28:17 -0400 Subject: [PATCH 1/3] Stateless needsRefine: derive CHANGES_REQUESTED state from PR alone (CROW-508) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the edge-triggered "needs refine" machinery shipped in #507 with a stateless rule that derives the answer from the PR snapshot on every poll. No `previousPRStatus` persistence, no `emittedTransitionMeta` map, no `headShaAtEmit` snapshots, no stalled-re-fire pass, no quiet-window bookkeeping. The rule: emit `.changesRequested` when the PR is OPEN and in CHANGES_REQUESTED, the latest CHANGES_REQUESTED review timestamp is newer than the latest non-merge/non-rebase commit timestamp, the managed terminal is idle, the PR has been observed at least once (first- observation skip), and 7 minutes have elapsed since the last dispatch for this PR. Closes #508 πŸ¦β€β¬› Generated with Claude Code, orchestrated by Crow Co-Authored-By: Claude Crow-Session: 3AC2E8F0-213B-4D08-9B53-9510D856BE6C --- .../Sources/CrowCore/Models/PRStatus.swift | 85 ++- .../CrowCore/Models/PRStatusTransition.swift | 188 +------ .../PRStatusNeedsRefineTests.swift | 152 ++++++ .../PRStatusTransitionTests.swift | 158 +----- .../Sources/CrowPersistence/JSONStore.swift | 44 +- .../CrowPersistenceTests/JSONStoreTests.swift | 74 +-- .../Sources/CrowProvider/BackendTypes.swift | 24 +- .../Backends/GitHubCodeBackend.swift | 66 ++- .../CrowProviderTests/BackendsTests.swift | 82 ++- Sources/Crow/App/AppDelegate.swift | 6 - Sources/Crow/App/IssueTracker.swift | 425 ++++----------- Tests/CrowTests/IssueTrackerDedupTests.swift | 42 +- .../IssueTrackerNeedsRefineTests.swift | 212 ++++++++ .../IssueTrackerStalledRefireTests.swift | 492 ------------------ 14 files changed, 753 insertions(+), 1297 deletions(-) create mode 100644 Packages/CrowCore/Tests/CrowCoreTests/PRStatusNeedsRefineTests.swift create mode 100644 Tests/CrowTests/IssueTrackerNeedsRefineTests.swift delete mode 100644 Tests/CrowTests/IssueTrackerStalledRefireTests.swift diff --git a/Packages/CrowCore/Sources/CrowCore/Models/PRStatus.swift b/Packages/CrowCore/Sources/CrowCore/Models/PRStatus.swift index bd0fe007..579c5a5e 100644 --- a/Packages/CrowCore/Sources/CrowCore/Models/PRStatus.swift +++ b/Packages/CrowCore/Sources/CrowCore/Models/PRStatus.swift @@ -6,31 +6,26 @@ public struct PRStatus: Codable, Sendable, Equatable { public var reviewStatus: ReviewStatus public var mergeable: MergeStatus public var failedCheckNames: [String] - /// Head commit SHA. Used to dedupe per-commit transition events - /// (e.g. don't re-fire "checks failing" when the same commit is re-run). + /// Head commit SHA. Used to dedupe per-commit `.checksFailing` events + /// (don't re-fire when the same commit is re-run). public var headSha: String? - /// Node ID of the most recent CHANGES_REQUESTED review on this PR, when - /// the provider surfaces stable review identifiers. Round-2 dedup uses - /// this so a fresh "Request changes" submission re-arms auto-respond - /// even when the bucket stays in `.changesRequested`. `nil` when no - /// CHANGES_REQUESTED review is currently visible, or on providers that - /// don't expose review IDs in the monitored-PR query (e.g. GitLab). - public var latestReviewID: String? /// Whether the underlying PR is currently OPEN (as opposed to MERGED or /// closed-unmerged). Distinct from `mergeable == .merged`: a CLOSED PR - /// has `mergeable == .unknown` but is still not actionable. - /// - /// Required by the stalled-`.changesRequested` re-fire pass (CROW-505): - /// the standing-state predicate reads `previousPRStatus[sid]` directly - /// rather than firing on transition edges, so without this flag a PR - /// that merged or closed while in `CHANGES_REQUESTED` (its - /// `reviewDecision` doesn't reset on close) would keep re-prompting the - /// agent every quiet window to "address review feedback" on a dead PR. - /// Defaults to `true` for backward decode β€” pre-CROW-505 stores have no - /// `isOpen` field, and the very next poll rewrites the persisted status - /// from the live viewer payload before re-fire is consulted, so the - /// transient default never affects an actual re-fire decision. + /// has `mergeable == .unknown` but is still not actionable. Gates + /// `needsRefine` so a dead PR can never re-prompt. public var isOpen: Bool + /// Max `submittedAt` across CHANGES_REQUESTED reviews currently visible + /// on the PR. `nil` when no CHANGES_REQUESTED review is present (or the + /// provider doesn't surface review timestamps, e.g. GitLab today). + /// The stateless "needs refine" rule compares this against + /// `lastSubstantiveCommitAt` to decide whether the agent owes a response. + public var lastChangesRequestedAt: Date? + /// Max `committedDate` across the PR's commits that are NOT rebases or + /// merges (parent count < 2 AND message does not start with a merge + /// prefix). `nil` when no commit timestamp data is available. Used by + /// the stateless rule to know whether the author has substantively + /// responded since the latest CHANGES_REQUESTED review. + public var lastSubstantiveCommitAt: Date? public init( checksPass: CheckStatus = .unknown, @@ -38,16 +33,18 @@ public struct PRStatus: Codable, Sendable, Equatable { mergeable: MergeStatus = .unknown, failedCheckNames: [String] = [], headSha: String? = nil, - latestReviewID: String? = nil, - isOpen: Bool = true + isOpen: Bool = true, + lastChangesRequestedAt: Date? = nil, + lastSubstantiveCommitAt: Date? = nil ) { self.checksPass = checksPass self.reviewStatus = reviewStatus self.mergeable = mergeable self.failedCheckNames = failedCheckNames self.headSha = headSha - self.latestReviewID = latestReviewID self.isOpen = isOpen + self.lastChangesRequestedAt = lastChangesRequestedAt + self.lastSubstantiveCommitAt = lastSubstantiveCommitAt } public init(from decoder: Decoder) throws { @@ -57,12 +54,14 @@ public struct PRStatus: Codable, Sendable, Equatable { mergeable = try c.decodeIfPresent(MergeStatus.self, forKey: .mergeable) ?? .unknown failedCheckNames = try c.decodeIfPresent([String].self, forKey: .failedCheckNames) ?? [] headSha = try c.decodeIfPresent(String.self, forKey: .headSha) - latestReviewID = try c.decodeIfPresent(String.self, forKey: .latestReviewID) isOpen = try c.decodeIfPresent(Bool.self, forKey: .isOpen) ?? true + lastChangesRequestedAt = try c.decodeIfPresent(Date.self, forKey: .lastChangesRequestedAt) + lastSubstantiveCommitAt = try c.decodeIfPresent(Date.self, forKey: .lastSubstantiveCommitAt) } private enum CodingKeys: String, CodingKey { - case checksPass, reviewStatus, mergeable, failedCheckNames, headSha, latestReviewID, isOpen + case checksPass, reviewStatus, mergeable, failedCheckNames, headSha, isOpen + case lastChangesRequestedAt, lastSubstantiveCommitAt } public enum CheckStatus: String, Codable, Sendable { @@ -112,4 +111,38 @@ public struct PRStatus: Codable, Sendable, Equatable { public var hasBlockers: Bool { !isMerged && (checksPass == .failing || reviewStatus == .changesRequested || mergeable == .conflicting) } + + /// The stateless "needs refine" rule (CROW-508). Returns `true` when the + /// PR is sitting in CHANGES_REQUESTED, is still open, and the agent has + /// not made a substantive commit since the latest CHANGES_REQUESTED + /// review. `terminalIdle` gates the outer IssueTracker dispatch β€” keeping + /// it as a parameter here makes the rule fully derivable from the PR plus + /// terminal state, with no per-session bookkeeping. + /// + /// The rule is intentionally tolerant of nil timestamps: + /// - `lastChangesRequestedAt == nil`: GitHub said `reviewDecision == + /// CHANGES_REQUESTED` but didn't surface a timestamped CR review (rare; + /// `latestReviews` paginates and can omit one). We refuse to fire β€” a + /// missing timestamp can't anchor "since when," and a false fire would + /// re-prompt the agent for no reviewer action. + /// - `lastSubstantiveCommitAt == nil`: the PR has no qualifying commits + /// yet (or commit data wasn't fetched). Treat as "no response since + /// review" β†’ still needs refine. + /// + /// Anti-loop is automatic: as soon as the agent makes a non-merge, + /// non-rebase commit, `lastSubstantiveCommitAt` advances past + /// `lastChangesRequestedAt` and this returns false on the next poll. + /// A `Merge branch 'main'` commit does NOT advance the timestamp (it's + /// filtered out upstream when computing `lastSubstantiveCommitAt`), so + /// the GitHub "Update branch" button can't trick the rule into a false + /// negative. + public static func needsRefine(status: PRStatus, terminalIdle: Bool) -> Bool { + guard terminalIdle else { return false } + guard status.reviewStatus == .changesRequested, status.isOpen else { return false } + guard let lastReview = status.lastChangesRequestedAt else { return false } + if let lastCommit = status.lastSubstantiveCommitAt { + return lastCommit < lastReview + } + return true + } } diff --git a/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift b/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift index 121a4333..f4abf70a 100644 --- a/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift +++ b/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift @@ -2,15 +2,15 @@ import Foundation /// A detected change in a session's `PRStatus` that warrants user attention. /// -/// Computed by `IssueTracker` once per polling cycle by comparing the new -/// `PRStatus` against the previous one. Pure value type β€” no UI/AppState deps β€” -/// so the comparison logic stays unit-testable. +/// `.changesRequested` is emitted by the stateless `PRStatus.needsRefine` +/// path in `IssueTracker` (CROW-508). `.checksFailing` is still edge-detected +/// from the previous PR status β€” checks that just flipped from passing/pending +/// to failing on a new commit. public struct PRStatusTransition: Sendable, Equatable { public enum Kind: String, Sendable, Equatable { - /// A reviewer transitioned the PR into "changes requested", or a fresh - /// formal "Request changes" review was submitted while the PR was - /// already in `.changesRequested` (round-2; identified by review-id - /// rotation). See `transitions(from:to:…)` for the detection rules. + /// A reviewer has CHANGES_REQUESTED and the agent hasn't responded + /// with a substantive commit since. Emitted on every poll the rule + /// holds, gated by the per-PR cooldown in `IssueTracker`. case changesRequested /// At least one CI/CD check newly transitioned to failing on the /// current head commit. @@ -21,29 +21,12 @@ public struct PRStatusTransition: Sendable, Equatable { public let sessionID: UUID public let prURL: String public let prNumber: Int? - /// Head commit SHA at the moment of the transition. Part of the dedupe key - /// for `.checksFailing` so re-runs on the same commit don't re-fire. - /// Carried on `.changesRequested` for downstream telemetry/UI but is not - /// part of that dedupe key (see `dedupeKey`). + /// Head commit SHA at the moment of the transition. Carried through to + /// downstream consumers (logging, AutoRespondCoordinator) but is no + /// longer part of any dedupe key (CROW-508 removed the meta map). public let headSha: String? - /// Node ID of the most recent CHANGES_REQUESTED review when the transition - /// fired. Sole round-2 discriminator in the `.changesRequested` dedupe - /// key β€” a fresh formal review rotates this and re-arms auto-respond, - /// while the author's own response push (which doesn't dismiss the - /// reviewer's CHANGES_REQUESTED decision) leaves it alone and stays - /// deduped. `nil` for providers that don't expose review IDs. - public let latestReviewID: String? /// Names of failing checks (empty for `.changesRequested`). public let failedCheckNames: [String] - /// True when this transition is a synthetic re-emission produced by the - /// stalled-`.changesRequested` re-fire pass (CROW-505), not a fresh - /// edge-detected transition. Consumers route on this to suppress noisy - /// downstream effects on re-fires (e.g. AppDelegate skips - /// `notifyPRTransition` so the user doesn't get a fresh macOS - /// notification every quiet window for the same review). The - /// `AutoRespondCoordinator` dispatch still runs β€” re-prompting the - /// stalled agent is the whole point. - public let isReFire: Bool public init( kind: Kind, @@ -51,101 +34,26 @@ public struct PRStatusTransition: Sendable, Equatable { prURL: String, prNumber: Int? = nil, headSha: String? = nil, - latestReviewID: String? = nil, - failedCheckNames: [String] = [], - isReFire: Bool = false + failedCheckNames: [String] = [] ) { self.kind = kind self.sessionID = sessionID self.prURL = prURL self.prNumber = prNumber self.headSha = headSha - self.latestReviewID = latestReviewID self.failedCheckNames = failedCheckNames - self.isReFire = isReFire - } - - /// Stable key used to suppress duplicate fires across polling cycles. - /// - /// `.changesRequested` keys on `(session, kind, latestReviewID)`. The - /// review ID rotates only when a reviewer submits a new formal review β€” - /// not when the author pushes a fix β€” so the auto-respond prompt that - /// instructs the agent to "commit the fix, push, re-request review" - /// can't trigger itself on the next poll (CROW-456 review feedback). - /// - /// `.checksFailing` keys on `(session, kind, headSha)` so a new commit - /// that also fails CI can re-fire. - public var dedupeKey: String { - switch kind { - case .changesRequested: - return "\(sessionID.uuidString)|changesRequested|\(latestReviewID ?? "")" - case .checksFailing: - return "\(sessionID.uuidString)|checksFailing|\(headSha ?? "")" - } - } -} - -/// Per-emitted-transition metadata persisted alongside the dedup key. -/// -/// `emittedAt` lets the stalled-re-fire pass (CROW-505) enforce a quiet -/// window before re-prompting; `headShaAtEmit` is the head SHA observed at -/// dispatch, which preserves the anti-loop guarantee β€” a real agent push -/// advances the head SHA, so re-fire only triggers when the author has not -/// pushed since the original prompt fired. -/// -/// `reFireCount` bounds the number of times the same emission may re-fire -/// before requiring a real edge event (reviewer rotates `latestReviewID`, -/// author pushes, bucket exits) to reset. Without this cap the re-fire would -/// loop forever when the agent legitimately *replies* on the PR instead of -/// pushing β€” a behavior the auto-respond prompt itself encourages ("If a -/// comment is unclear or you disagree, leave a reply…"). The reply leaves -/// the SHA unchanged but is a valid terminal state, not a stall. -public struct EmittedTransitionMeta: Codable, Sendable, Equatable { - public var emittedAt: Date - public var headShaAtEmit: String? - public var reFireCount: Int - - public init(emittedAt: Date, headShaAtEmit: String?, reFireCount: Int = 0) { - self.emittedAt = emittedAt - self.headShaAtEmit = headShaAtEmit - self.reFireCount = reFireCount - } - - public init(from decoder: Decoder) throws { - let c = try decoder.container(keyedBy: CodingKeys.self) - emittedAt = try c.decode(Date.self, forKey: .emittedAt) - headShaAtEmit = try c.decodeIfPresent(String.self, forKey: .headShaAtEmit) - // Backward decode for pre-cap stores: a missing field means the entry - // has never been re-fired yet, so 0 is the safe default. - reFireCount = try c.decodeIfPresent(Int.self, forKey: .reFireCount) ?? 0 - } - - private enum CodingKeys: String, CodingKey { - case emittedAt, headShaAtEmit, reFireCount } } extension PRStatus { - /// Compute the transitions implied by moving from `old` to `new`. - /// - /// On the first observation (`old == nil`), emits a transition only when - /// the new state is itself a "needs attention" condition β€” `.changesRequested` - /// or failing checks. A PR first observed as `.open` with passing/pending - /// checks emits nothing (the startup-flood safety the bare guard used to - /// provide). Persisted `emittedTransitionKeys` in `IssueTracker` (CROW-456) - /// suppress restart re-fires of transitions already emitted by a prior run. + /// Compute the `.checksFailing` transition (if any) implied by moving from + /// `old` to `new`. `.changesRequested` is no longer computed here β€” the + /// stateless `needsRefine` path in `IssueTracker` handles it directly off + /// the PR snapshot (CROW-508). /// - /// Otherwise emits at most one `.changesRequested` and one `.checksFailing`, - /// in that order. - /// - /// `.changesRequested` fires when the bucket is entered (`old` wasn't - /// `.changesRequested`) or when the latest CHANGES_REQUESTED review ID - /// rotates while still in the bucket (a fresh formal "Request changes" - /// submission). A head-SHA change alone does **not** fire β€” `reviewDecision` - /// stays `CHANGES_REQUESTED` after the author pushes, so any such trigger - /// would re-fire on the agent's own response push and create an auto-respond - /// loop. Real "round 2 after a fix" always involves the reviewer submitting - /// a new formal review, which rotates the review ID. + /// On the first observation (`old == nil`), emits a `.checksFailing` + /// transition only when the new state itself is failing β€” same + /// startup-flood safety the prior implementation provided for CI. /// /// - Note: This is the pure piece of the transition pipeline; the /// stateful dedupe (across polls) lives in `IssueTracker`. @@ -156,66 +64,10 @@ extension PRStatus { prURL: String, prNumber: Int? ) -> [PRStatusTransition] { - guard let old else { - // First observation (CROW-477): emit only when the new state is - // itself attention-needing, so a PR Crow first sees already in - // CHANGES_REQUESTED (or with failing checks) still routes to - // auto-respond. Restart-spurious re-fires are prevented by the - // persisted dedupe keys at the IssueTracker layer, not here. - var out: [PRStatusTransition] = [] - if new.reviewStatus == .changesRequested { - out.append(PRStatusTransition( - kind: .changesRequested, - sessionID: sessionID, - prURL: prURL, - prNumber: prNumber, - headSha: new.headSha, - latestReviewID: new.latestReviewID, - failedCheckNames: [] - )) - } - if new.checksPass == .failing { - out.append(PRStatusTransition( - kind: .checksFailing, - sessionID: sessionID, - prURL: prURL, - prNumber: prNumber, - headSha: new.headSha, - failedCheckNames: new.failedCheckNames - )) - } - return out - } - var out: [PRStatusTransition] = [] - if new.reviewStatus == .changesRequested { - let entering = old.reviewStatus != .changesRequested - // Require both ids to be non-nil. If a CHANGES_REQUESTED review is - // briefly absent from `latestReviews(first: 5)` (buried behind 5+ - // other reviewers' latest reviews) and then resurfaces with a new - // id, the nil β†’ id step intentionally won't fire. We prefer a - // missed re-prompt to a spurious one β€” `reviewDecision` and the - // `latestReviews`-derived id can legitimately disagree during a - // single CHANGES_REQUESTED tenure, and a spurious fire would - // re-prompt the agent for no reviewer action. - let newReview = old.latestReviewID != nil - && new.latestReviewID != nil - && old.latestReviewID != new.latestReviewID - if entering || newReview { - out.append(PRStatusTransition( - kind: .changesRequested, - sessionID: sessionID, - prURL: prURL, - prNumber: prNumber, - headSha: new.headSha, - latestReviewID: new.latestReviewID, - failedCheckNames: [] - )) - } - } - - if old.checksPass != .failing && new.checksPass == .failing { + let wasFailing = old?.checksPass == .failing + if !wasFailing, new.checksPass == .failing { out.append(PRStatusTransition( kind: .checksFailing, sessionID: sessionID, diff --git a/Packages/CrowCore/Tests/CrowCoreTests/PRStatusNeedsRefineTests.swift b/Packages/CrowCore/Tests/CrowCoreTests/PRStatusNeedsRefineTests.swift new file mode 100644 index 00000000..bf57a127 --- /dev/null +++ b/Packages/CrowCore/Tests/CrowCoreTests/PRStatusNeedsRefineTests.swift @@ -0,0 +1,152 @@ +import Foundation +import Testing +@testable import CrowCore + +/// CROW-508 β€” stateless "needs refine" rule. The PR snapshot alone (plus the +/// terminal-idle flag) decides whether the agent owes a response to the +/// latest CHANGES_REQUESTED review. These tests pin every edge case of the +/// pure predicate so the regression surface is independent of the IssueTracker +/// wiring (cooldown, first-observation skip, opt-in toggle). +@Suite("PRStatus.needsRefine (CROW-508)") +struct PRStatusNeedsRefineTests { + private let reviewAt = Date(timeIntervalSince1970: 1_700_000_000) + private let beforeReview = Date(timeIntervalSince1970: 1_699_999_000) + private let afterReview = Date(timeIntervalSince1970: 1_700_001_000) + + private func status( + review: PRStatus.ReviewStatus = .changesRequested, + isOpen: Bool = true, + lastChangesRequestedAt: Date? = nil, + lastSubstantiveCommitAt: Date? = nil + ) -> PRStatus { + PRStatus( + checksPass: .pending, + reviewStatus: review, + mergeable: .unknown, + failedCheckNames: [], + headSha: "abc", + isOpen: isOpen, + lastChangesRequestedAt: lastChangesRequestedAt, + lastSubstantiveCommitAt: lastSubstantiveCommitAt + ) + } + + // MARK: - Acceptance Test 1 β€” round-N stall + + @Test + func firesWhenReviewIsNewerThanLastCommit() { + // Reviewer left CHANGES_REQUESTED, agent committed BEFORE that review, + // and the terminal is idle β€” the bug repro the ticket centers on. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: beforeReview + ) + #expect(PRStatus.needsRefine(status: s, terminalIdle: true)) + } + + @Test + func firesWhenChangesRequestedAndNoCommitsYet() { + // First CHANGES_REQUESTED on a brand-new PR that has no qualifying + // commits yet (or commit data not fetched). Treat "no commits" as + // "no response since review" β€” the rule fires. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: nil + ) + #expect(PRStatus.needsRefine(status: s, terminalIdle: true)) + } + + // MARK: - Acceptance Test 2 β€” merge-from-main doesn't reset + + @Test + func mergeFromMainDoesNotResetTheRule() { + // The "Update branch" button produces a merge commit that is filtered + // out upstream (in parsePRNode) when computing lastSubstantiveCommitAt. + // From the rule's perspective: lastSubstantiveCommitAt stays at the + // pre-review commit (before the review), so needsRefine still fires. + // This test pins the post-filter behavior: an OLD lastSubstantiveCommitAt + // does not advance just because a merge commit was pushed. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: beforeReview // upstream filter kept the old value + ) + #expect(PRStatus.needsRefine(status: s, terminalIdle: true)) + } + + // MARK: - Acceptance Test 3 β€” real fix flips it + + @Test + func doesNotFireWhenCommitIsNewerThanReview() { + // Agent pushed a real (non-merge, non-rebase) commit after the + // CHANGES_REQUESTED review. lastSubstantiveCommitAt advances past + // lastChangesRequestedAt β†’ rule stops firing. The anti-loop property + // the head-SHA gate used to enforce, now derived from PR data alone. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: afterReview + ) + #expect(!PRStatus.needsRefine(status: s, terminalIdle: true)) + } + + @Test + func doesNotFireWhenCommitEqualsReviewTime() { + // Edge: review and commit timestamps coincide (network jitter, GitHub + // rounding). `<` not `<=` β€” commit at exactly review time counts as + // "already responded", so the rule does NOT fire. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: reviewAt + ) + #expect(!PRStatus.needsRefine(status: s, terminalIdle: true)) + } + + // MARK: - Gates: terminal, review state, isOpen, missing timestamp + + @Test + func doesNotFireWhenTerminalNotIdle() { + // Agent is mid-work β€” don't interrupt. + let s = status( + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: beforeReview + ) + #expect(!PRStatus.needsRefine(status: s, terminalIdle: false)) + } + + @Test + func doesNotFireWhenReviewStatusIsNotChangesRequested() { + // Reviewer already approved or hasn't reviewed yet β€” out of bucket. + #expect(!PRStatus.needsRefine( + status: status(review: .approved, lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview), + terminalIdle: true + )) + #expect(!PRStatus.needsRefine( + status: status(review: .reviewRequired, lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview), + terminalIdle: true + )) + } + + @Test + func doesNotFireWhenPRIsClosed() { + // GitHub keeps `reviewDecision == CHANGES_REQUESTED` after a PR + // merges or closes. The isOpen gate prevents re-prompting the agent + // to "address review feedback" on a dead PR. + let merged = status( + isOpen: false, + lastChangesRequestedAt: reviewAt, + lastSubstantiveCommitAt: beforeReview + ) + #expect(!PRStatus.needsRefine(status: merged, terminalIdle: true)) + } + + @Test + func doesNotFireWhenChangesRequestedTimestampMissing() { + // GitHub says CHANGES_REQUESTED but didn't surface a timestamped CR + // review (rare paging quirk). Without an anchor we can't decide + // "since when", and a false fire is worse than a missed one. + let s = status( + lastChangesRequestedAt: nil, + lastSubstantiveCommitAt: beforeReview + ) + #expect(!PRStatus.needsRefine(status: s, terminalIdle: true)) + } +} diff --git a/Packages/CrowCore/Tests/CrowCoreTests/PRStatusTransitionTests.swift b/Packages/CrowCore/Tests/CrowCoreTests/PRStatusTransitionTests.swift index 58a7e7a1..c136a69e 100644 --- a/Packages/CrowCore/Tests/CrowCoreTests/PRStatusTransitionTests.swift +++ b/Packages/CrowCore/Tests/CrowCoreTests/PRStatusTransitionTests.swift @@ -2,7 +2,7 @@ import Foundation import Testing @testable import CrowCore -@Suite("PRStatus transitions") +@Suite("PRStatus transitions (checks-failing edge)") struct PRStatusTransitionTests { private let sessionID = UUID() private let prURL = "https://github.com/foo/bar/pull/1" @@ -13,7 +13,6 @@ struct PRStatusTransitionTests { review: PRStatus.ReviewStatus = .reviewRequired, checks: PRStatus.CheckStatus = .pending, sha: String? = nil, - reviewID: String? = nil, failed: [String] = [] ) -> PRStatus { PRStatus( @@ -21,8 +20,7 @@ struct PRStatusTransitionTests { reviewStatus: review, mergeable: .unknown, failedCheckNames: failed, - headSha: sha, - latestReviewID: reviewID + headSha: sha ) } @@ -33,32 +31,17 @@ struct PRStatusTransitionTests { // MARK: - First-observation gate @Test - func firstObservationOpenWithNoReviewDoesNotFire() { - // No prior status AND the new state isn't attention-needing β€” must not - // fire. Protects the startup-flood case the original guard was meant - // to cover. + func firstObservationOpenWithNoFailingChecksDoesNotFire() { + // Post-CROW-508: only `.checksFailing` is edge-emitted; everything + // about `.changesRequested` moved to the stateless `needsRefine` + // path in `IssueTracker`. #expect(compute(from: nil, to: status(review: .reviewRequired, checks: .pending)).isEmpty) #expect(compute(from: nil, to: status(review: .approved, checks: .passing)).isEmpty) - } - - @Test - func firstObservationChangesRequestedFires() { - // CROW-477: PR was already in CHANGES_REQUESTED when Crow first observed - // it (poll missed the open β†’ changes-requested window, or session was - // created after the review landed). Must still route to auto-respond. - // IssueTracker's persisted dedupe (CROW-456) prevents restart re-fires. - let ts = compute(from: nil, to: status(review: .changesRequested, sha: shaA, reviewID: "R_1")) - #expect(ts.count == 1) - #expect(ts.first?.kind == .changesRequested) - #expect(ts.first?.latestReviewID == "R_1") - #expect(ts.first?.headSha == shaA) - #expect(ts.first?.sessionID == sessionID) - #expect(ts.first?.prURL == prURL) + #expect(compute(from: nil, to: status(review: .changesRequested)).isEmpty) } @Test func firstObservationFailingChecksFires() { - // CROW-477: same idea for failing CI on first observation. let ts = compute(from: nil, to: status(checks: .failing, sha: shaA, failed: ["lint"])) #expect(ts.count == 1) #expect(ts.first?.kind == .checksFailing) @@ -66,90 +49,6 @@ struct PRStatusTransitionTests { #expect(ts.first?.failedCheckNames == ["lint"]) } - @Test - func firstObservationBothChangesRequestedAndFailingFiresBoth() { - let ts = compute(from: nil, to: status(review: .changesRequested, checks: .failing, sha: shaA, reviewID: "R_1", failed: ["lint"])) - #expect(ts.count == 2) - #expect(ts.contains(where: { $0.kind == .changesRequested && $0.latestReviewID == "R_1" })) - #expect(ts.contains(where: { $0.kind == .checksFailing && $0.failedCheckNames == ["lint"] })) - } - - // MARK: - Changes requested - - @Test - func reviewRequiredToChangesRequestedFires() { - let old = status(review: .reviewRequired) - let new = status(review: .changesRequested) - let ts = compute(from: old, to: new) - #expect(ts.count == 1) - #expect(ts.first?.kind == .changesRequested) - #expect(ts.first?.sessionID == sessionID) - #expect(ts.first?.prURL == prURL) - } - - @Test - func approvedToChangesRequestedFires() { - let ts = compute(from: status(review: .approved), to: status(review: .changesRequested)) - #expect(ts.count == 1) - #expect(ts.first?.kind == .changesRequested) - } - - @Test - func changesRequestedToChangesRequestedDoesNotFire() { - // Status hasn't transitioned β€” every poll observes the same state with - // the same review ID and SHA, so no re-fire. - let old = status(review: .changesRequested, sha: shaA, reviewID: "R_1") - let new = status(review: .changesRequested, sha: shaA, reviewID: "R_1") - #expect(compute(from: old, to: new).isEmpty) - } - - @Test - func changesRequestedSameStateNewReviewIDFires() { - // CROW-456: reviewer submitted a second "Request changes" without an - // intervening approval. The review ID rotates, so we must re-arm. - let old = status(review: .changesRequested, sha: shaA, reviewID: "R_1") - let new = status(review: .changesRequested, sha: shaA, reviewID: "R_2") - let ts = compute(from: old, to: new) - #expect(ts.count == 1) - #expect(ts.first?.kind == .changesRequested) - #expect(ts.first?.latestReviewID == "R_2") - } - - @Test - func changesRequestedSameStateNewShaWithoutNewReviewDoesNotFire() { - // CROW-456 review feedback: a head-SHA change alone must NOT fire. - // GitHub doesn't dismiss CHANGES_REQUESTED on author push, so the - // agent's own response push (after the auto-respond prompt told it - // to commit + push + re-request reviewers) would otherwise trigger - // auto-respond again β€” a self-sustaining loop. Real "round 2" always - // brings a new formal review and a new review ID. - let old = status(review: .changesRequested, sha: shaA, reviewID: "R_1") - let new = status(review: .changesRequested, sha: shaB, reviewID: "R_1") - #expect(compute(from: old, to: new).isEmpty) - } - - @Test - func changesRequestedNewReviewAfterAgentPushFires() { - // The "agent pushed a fix, reviewer responded with more changes" path. - // The fresh formal review rotates `latestReviewID` regardless of how - // many SHAs the agent pushed in between, so `newReview` covers this - // case without needing a separate post-commit-push trigger. - let old = status(review: .changesRequested, sha: shaA, reviewID: "R_1") - let new = status(review: .changesRequested, sha: shaB, reviewID: "R_2") - let ts = compute(from: old, to: new) - #expect(ts.count == 1) - #expect(ts.first?.kind == .changesRequested) - #expect(ts.first?.latestReviewID == "R_2") - #expect(ts.first?.headSha == shaB) - } - - @Test - func changesRequestedToApprovedDoesNotFire() { - // Reviewer approved β€” that's the rule clearing, not a new event to act on. - let ts = compute(from: status(review: .changesRequested), to: status(review: .approved)) - #expect(ts.isEmpty) - } - // MARK: - Checks failing @Test @@ -172,8 +71,6 @@ struct PRStatusTransitionTests { @Test func failingToFailingDoesNotFire() { - // Pure transitions (no dedupe yet) treats this as no transition because - // `old.checksPass == .failing` already. let ts = compute(from: status(checks: .failing, sha: shaA), to: status(checks: .failing, sha: shaA)) #expect(ts.isEmpty) } @@ -184,41 +81,16 @@ struct PRStatusTransitionTests { #expect(ts.isEmpty) } - // MARK: - Both transitions in one cycle + // MARK: - changes-requested no longer comes through transitions() @Test - func simultaneousTransitionsFireBoth() { - let old = status(review: .reviewRequired, checks: .pending, sha: shaA) - let new = status(review: .changesRequested, checks: .failing, sha: shaA) + func changesRequestedNeverEmittedByTransitions() { + // The stateless `needsRefine` rule in `IssueTracker` is the sole + // emitter of `.changesRequested` post-CROW-508. `transitions()` must + // never produce one β€” guarding against accidental reintroduction. + let old = status(review: .reviewRequired) + let new = status(review: .changesRequested) let ts = compute(from: old, to: new) - #expect(ts.count == 2) - #expect(ts.contains(where: { $0.kind == .changesRequested })) - #expect(ts.contains(where: { $0.kind == .checksFailing })) - } - - // MARK: - Dedupe key - - @Test - func dedupeKeyForChangesRequestedIncludesReviewIDOnly() { - // CROW-456: review ID is the sole round-2 discriminator. Same review - // collapses; new review breaks the key. Head SHA is intentionally NOT - // in the key β€” see `changesRequestedSameStateNewShaWithoutNewReviewDoesNotFire` - // for the rationale (avoids the agent-push self-loop). - let base = PRStatusTransition(kind: .changesRequested, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaA, latestReviewID: "R_1") - let sameAgain = PRStatusTransition(kind: .changesRequested, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaA, latestReviewID: "R_1") - let newReview = PRStatusTransition(kind: .changesRequested, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaA, latestReviewID: "R_2") - let onlyNewSha = PRStatusTransition(kind: .changesRequested, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaB, latestReviewID: "R_1") - - #expect(base.dedupeKey == sameAgain.dedupeKey) - #expect(base.dedupeKey != newReview.dedupeKey) - #expect(base.dedupeKey == onlyNewSha.dedupeKey) - } - - @Test - func dedupeKeyForChecksFailingIncludesSha() { - // A new commit that also fails CI must be allowed to re-fire. - let t1 = PRStatusTransition(kind: .checksFailing, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaA) - let t2 = PRStatusTransition(kind: .checksFailing, sessionID: sessionID, prURL: prURL, prNumber: 1, headSha: shaB) - #expect(t1.dedupeKey != t2.dedupeKey) + #expect(!ts.contains(where: { $0.kind == .changesRequested })) } } diff --git a/Packages/CrowPersistence/Sources/CrowPersistence/JSONStore.swift b/Packages/CrowPersistence/Sources/CrowPersistence/JSONStore.swift index b9c6daa5..2411daee 100644 --- a/Packages/CrowPersistence/Sources/CrowPersistence/JSONStore.swift +++ b/Packages/CrowPersistence/Sources/CrowPersistence/JSONStore.swift @@ -1,35 +1,14 @@ import Foundation import CrowCore -/// Persisted slice of `IssueTracker` state needed to survive Crow restarts -/// without re-firing already-handled PR transitions (CROW-456). -/// -/// `previousPRStatus` is keyed by session UUID string so the on-disk shape -/// stays plain JSON. `emittedTransitionKeys` is the legacy array of dedup -/// keys retained for backward read; CROW-505 introduced the richer -/// `emittedTransitionMeta` map, which is now the source of truth on write. -/// Older `store.json` files lacking the map are migrated lazily in -/// `IssueTracker.hydratePersistedState`. -public struct PersistedIssueTrackerState: Codable, Sendable, Equatable { - public var previousPRStatus: [String: PRStatus] - public var emittedTransitionKeys: [String] - /// Optional for backward compat: pre-CROW-505 stores only wrote - /// `emittedTransitionKeys`. Newer stores populate this map and leave - /// `emittedTransitionKeys` empty. - public var emittedTransitionMeta: [String: EmittedTransitionMeta]? - - public init( - previousPRStatus: [String: PRStatus] = [:], - emittedTransitionKeys: [String] = [], - emittedTransitionMeta: [String: EmittedTransitionMeta]? = nil - ) { - self.previousPRStatus = previousPRStatus - self.emittedTransitionKeys = emittedTransitionKeys - self.emittedTransitionMeta = emittedTransitionMeta - } -} - /// Persisted data structure. +/// +/// The pre-CROW-508 `issueTrackerState` blob (last-observed PR status + +/// emitted-transition dedup keys / meta) is no longer persisted: the +/// stateless "needs refine" rule derives the answer from the PR on every +/// poll, so cross-restart state isn't needed. Older `store.json` files may +/// still carry that key; JSON decoding silently ignores unknown keys, so +/// existing stores keep loading cleanly without a migration step. public struct StoreData: Codable, Sendable { public var sessions: [Session] public var worktrees: [SessionWorktree] @@ -40,26 +19,19 @@ public struct StoreData: Codable, Sendable { /// synthesized `Codable` tolerates a missing optional, keeping us backward /// compatible and avoiding the corrupt-store backup path. public var hookStates: [String: PersistedHookState]? - /// Cross-restart cache of last-observed PR statuses + emitted transition - /// dedup keys (CROW-456). Optional for backward compatibility with older - /// `store.json` files; on first read after upgrade it's nil and the - /// tracker behaves as it always did until the next poll persists state. - public var issueTrackerState: PersistedIssueTrackerState? public init( sessions: [Session] = [], worktrees: [SessionWorktree] = [], links: [SessionLink] = [], terminals: [SessionTerminal] = [], - hookStates: [String: PersistedHookState]? = nil, - issueTrackerState: PersistedIssueTrackerState? = nil + hookStates: [String: PersistedHookState]? = nil ) { self.sessions = sessions self.worktrees = worktrees self.links = links self.terminals = terminals self.hookStates = hookStates - self.issueTrackerState = issueTrackerState } } diff --git a/Packages/CrowPersistence/Tests/CrowPersistenceTests/JSONStoreTests.swift b/Packages/CrowPersistence/Tests/CrowPersistenceTests/JSONStoreTests.swift index ad68de9f..bcf5a8e3 100644 --- a/Packages/CrowPersistence/Tests/CrowPersistenceTests/JSONStoreTests.swift +++ b/Packages/CrowPersistence/Tests/CrowPersistenceTests/JSONStoreTests.swift @@ -198,61 +198,40 @@ import Testing #expect(rl.label == "Issue") } -// MARK: - CROW-456 IssueTracker state persistence - -@Test func issueTrackerStateRoundTripsThroughDisk() throws { +// MARK: - CROW-508 store backward compat + +@Test func storeDataIgnoresLegacyIssueTrackerStateBlob() throws { + // CROW-508 removed `issueTrackerState` (and `PersistedIssueTrackerState`, + // `EmittedTransitionMeta`, etc.) β€” the stateless "needs refine" rule + // derives the answer from the PR on every poll, so no cross-restart + // state is needed. Older `store.json` files still carry the blob; the + // decoder must ignore the unknown key rather than failing and falling + // back to the corrupt-store backup path. let dir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) defer { try? FileManager.default.removeItem(at: dir) } + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) - let sessionID = UUID() - let pr = PRStatus( - checksPass: .failing, - reviewStatus: .changesRequested, - mergeable: .mergeable, - failedCheckNames: ["lint"], - headSha: "abc123", - latestReviewID: "R_kgD0_1" - ) - let dedupKey = "\(sessionID.uuidString)|changesRequested|R_kgD0_1" - let emittedAt = Date(timeIntervalSince1970: 1_700_000_000) - let state = PersistedIssueTrackerState( - previousPRStatus: [sessionID.uuidString: pr], - emittedTransitionKeys: [dedupKey], - emittedTransitionMeta: [dedupKey: EmittedTransitionMeta(emittedAt: emittedAt, headShaAtEmit: "abc123", reFireCount: 2)] - ) + let legacyJSON = """ + { + "sessions": [], + "worktrees": [], + "links": [], + "terminals": [], + "issueTrackerState": { + "previousPRStatus": {}, + "emittedTransitionKeys": [], + "emittedTransitionMeta": {} + } + } + """ + try legacyJSON.data(using: .utf8)!.write(to: dir.appendingPathComponent("store.json")) let store = JSONStore(directory: dir) - store.mutate { $0.issueTrackerState = state } - - let reloaded = JSONStore(directory: dir).data.issueTrackerState - #expect(reloaded == state) - #expect(reloaded?.previousPRStatus[sessionID.uuidString]?.latestReviewID == "R_kgD0_1") - #expect(reloaded?.emittedTransitionKeys == [dedupKey]) - #expect(reloaded?.emittedTransitionMeta?[dedupKey]?.headShaAtEmit == "abc123") - #expect(reloaded?.emittedTransitionMeta?[dedupKey]?.emittedAt == emittedAt) - // CROW-505 review #3: reFireCount survives the round trip so the - // per-emission cap persists across Crow restarts. - #expect(reloaded?.emittedTransitionMeta?[dedupKey]?.reFireCount == 2) -} - -@Test func emittedTransitionMetaDecodesLegacyEntryWithoutReFireCount() throws { - // Backward compat: pre-cap stores wrote `EmittedTransitionMeta` without - // the `reFireCount` field. The decode must default it to 0 so an - // upgrade doesn't wedge the re-fire path by inheriting a stale count. - // Uses the same `.iso8601` date strategy as `JSONStore` so the test - // matches the real on-disk shape. - let legacyJSON = #"{"emittedAt": "2023-11-14T22:13:20Z", "headShaAtEmit": "abc123"}"# - .data(using: .utf8)! - let decoder = JSONDecoder() - decoder.dateDecodingStrategy = .iso8601 - let decoded = try decoder.decode(EmittedTransitionMeta.self, from: legacyJSON) - #expect(decoded.headShaAtEmit == "abc123") - #expect(decoded.reFireCount == 0) + #expect(store.data.sessions.isEmpty) + #expect(store.data.worktrees.isEmpty) } @Test func storeDataDecodesWithoutIssueTrackerStateField() throws { - // Backward compatibility: older store.json files predate CROW-456 and - // won't have the issueTrackerState key. Decoding must still succeed. let dir = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) defer { try? FileManager.default.removeItem(at: dir) } try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) @@ -268,6 +247,5 @@ import Testing try legacyJSON.data(using: .utf8)!.write(to: dir.appendingPathComponent("store.json")) let store = JSONStore(directory: dir) - #expect(store.data.issueTrackerState == nil) #expect(store.data.sessions.isEmpty) } diff --git a/Packages/CrowProvider/Sources/CrowProvider/BackendTypes.swift b/Packages/CrowProvider/Sources/CrowProvider/BackendTypes.swift index 6a86ad13..7c45c5ba 100644 --- a/Packages/CrowProvider/Sources/CrowProvider/BackendTypes.swift +++ b/Packages/CrowProvider/Sources/CrowProvider/BackendTypes.swift @@ -47,13 +47,17 @@ public struct PRRecord: Sendable { public let checksState: String // SUCCESS / FAILURE / PENDING / EXPECTED / ERROR / "" public let failedCheckNames: [String] public let latestReviewStates: [String] - /// Node ID of the most recent CHANGES_REQUESTED review on this PR, when - /// available. Drives the round-2 dedup logic in `IssueTracker` so a second - /// formal "Request changes" submission re-arms auto-respond. `nil` when no - /// CHANGES_REQUESTED review is among the latest projected reviews, or on - /// providers (e.g. GitLab) that don't surface stable review IDs in the - /// monitored-MR query. - public let latestReviewID: String? + /// Max `submittedAt` across CHANGES_REQUESTED reviews currently visible + /// on the PR. Drives the stateless "needs refine" rule in `IssueTracker` + /// (CROW-508): compared against `lastSubstantiveCommitAt` to decide + /// whether the agent owes a response. `nil` when no CHANGES_REQUESTED + /// review is visible or the provider doesn't surface timestamps. + public let lastChangesRequestedAt: Date? + /// Max `committedDate` across the PR's commits that are NOT rebases or + /// merges (parent count < 2 AND message does not start with a merge + /// prefix). `nil` when commit data wasn't fetched (e.g. stale-PR + /// follow-up query, GitLab today, or empty commit list). + public let lastSubstantiveCommitAt: Date? /// Used by reconcile tie-breaking when multiple non-OPEN PRs exist on the same branch. public let updatedAt: Date? @@ -74,7 +78,8 @@ public struct PRRecord: Sendable { checksState: String = "", failedCheckNames: [String] = [], latestReviewStates: [String] = [], - latestReviewID: String? = nil, + lastChangesRequestedAt: Date? = nil, + lastSubstantiveCommitAt: Date? = nil, updatedAt: Date? = nil ) { self.number = number @@ -93,7 +98,8 @@ public struct PRRecord: Sendable { self.checksState = checksState self.failedCheckNames = failedCheckNames self.latestReviewStates = latestReviewStates - self.latestReviewID = latestReviewID + self.lastChangesRequestedAt = lastChangesRequestedAt + self.lastSubstantiveCommitAt = lastSubstantiveCommitAt self.updatedAt = updatedAt } } diff --git a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift index 7b3cd126..f09bc3a4 100644 --- a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift +++ b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift @@ -322,7 +322,17 @@ public struct GitHubCodeBackend: CodeBackend { } } } - latestReviews(first: 5) { nodes { id state submittedAt } } + latestReviews(first: 20) { nodes { id state submittedAt } } + commits(last: 30) { + nodes { + commit { + oid + messageHeadline + committedDate + parents(first: 2) { totalCount } + } + } + } } } } @@ -437,17 +447,36 @@ public struct GitHubCodeBackend: CodeBackend { } let latestReviewNodes = (node["latestReviews"] as? [String: Any])?["nodes"] as? [[String: Any]] ?? [] let reviewStates = latestReviewNodes.compactMap { $0["state"] as? String } - // Pick the round-2 dedup discriminator deterministically: the - // CHANGES_REQUESTED review with the latest `submittedAt`. GitHub's - // `latestReviews` connection doesn't document a node order, so we - // don't rely on it; ISO-8601 with `Z` is lexicographically sortable - // when the timezone is consistent (which GitHub guarantees), so a - // string `max(by:)` is correct here without parsing dates. - // `max(by:)` over an empty filtered array returns `nil`, which threads - // through to PRStatus.latestReviewID = nil exactly as required. - let latestReviewID = latestReviewNodes + let dateFmt = ISO8601DateFormatter() + dateFmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + // Stateless "needs refine" rule (CROW-508): the latest CHANGES_REQUESTED + // submission timestamp anchors "since when does the agent owe a + // response?". `latestReviews(first: 20)` is intentionally broad + // enough to cover several reviewers' latest reviews without paginating + // β€” GitHub orders that connection by reviewer, not by recency, so a + // narrow window could omit the CR we need. + let lastChangesRequestedAt = latestReviewNodes .filter { ($0["state"] as? String) == "CHANGES_REQUESTED" } - .max(by: { ($0["submittedAt"] as? String ?? "") < ($1["submittedAt"] as? String ?? "") })?["id"] as? String + .compactMap { ($0["submittedAt"] as? String).flatMap { dateFmt.date(from: $0) } } + .max() + // Stateless "needs refine" rule (CROW-508): the latest non-merge, + // non-rebase commit timestamp anchors "has the agent substantively + // responded since the review?". A merge commit (parent count >= 2) + // or a commit whose subject starts with `Merge branch|remote-tracking| + // pull request` is excluded so the GitHub "Update branch" button and + // routine rebases can't trick the rule into thinking the agent + // pushed a fix. + let commitNodes = (node["commits"] as? [String: Any])?["nodes"] as? [[String: Any]] ?? [] + let lastSubstantiveCommitAt = commitNodes + .compactMap { node -> Date? in + guard let commit = node["commit"] as? [String: Any] else { return nil } + let parents = (commit["parents"] as? [String: Any])?["totalCount"] as? Int ?? 1 + if parents >= 2 { return nil } + let message = (commit["messageHeadline"] as? String) ?? "" + if Self.isMergeCommitMessage(message) { return nil } + return (commit["committedDate"] as? String).flatMap { dateFmt.date(from: $0) } + } + .max() return PRRecord( number: number, url: url, @@ -465,10 +494,23 @@ public struct GitHubCodeBackend: CodeBackend { checksState: checksState, failedCheckNames: failedCheckNames, latestReviewStates: reviewStates, - latestReviewID: latestReviewID + lastChangesRequestedAt: lastChangesRequestedAt, + lastSubstantiveCommitAt: lastSubstantiveCommitAt ) } + /// Decide whether a commit subject line indicates a rebase/merge that + /// should NOT advance "agent substantively responded since review". + /// Match anchored to the start of the line; the prefix list mirrors what + /// `git merge` / GitHub's "Update branch" button produce. Public so + /// `parsePRNode` and unit tests share the same definition. + nonisolated static func isMergeCommitMessage(_ headline: String) -> Bool { + let trimmed = headline.trimmingCharacters(in: .whitespaces) + return trimmed.hasPrefix("Merge branch ") + || trimmed.hasPrefix("Merge remote-tracking ") + || trimmed.hasPrefix("Merge pull request ") + } + static func parseReviewRequests( _ searchObj: [String: Any]?, dateFormatter: ISO8601DateFormatter, diff --git a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift index 05b07ad1..a0d1744e 100644 --- a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift +++ b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift @@ -631,14 +631,14 @@ final class BackendsTests: XCTestCase { XCTAssertEqual(backend.provider, .corveil) } - // MARK: - parseMonitoredPRsResponse latestReviewID (CROW-456) - - /// `latestReviews` connection has no documented order. The parser must - /// pick the CHANGES_REQUESTED review with the latest `submittedAt`, not - /// the first one in the array β€” otherwise round-2 dedup could miss a - /// genuine new review (or worse, flip ids across polls and re-fire - /// auto-respond for no reviewer action). - func testParseMonitoredPRsPicksLatestSubmittedChangesRequestedReview() throws { + // MARK: - parseMonitoredPRsResponse timestamps (CROW-508) + + /// Pre-CROW-508 we picked the latest CR review's *id* for round-2 dedup. + /// The stateless "needs refine" rule needs the *timestamp* of that same + /// review β€” anchor for "since when does the agent owe a response?". + /// The parser must pick the max `submittedAt` across CHANGES_REQUESTED + /// reviews, not the first one in array order. + func testParseMonitoredPRsPicksLatestChangesRequestedTimestamp() throws { let json = """ { "data": { @@ -671,13 +671,12 @@ final class BackendsTests: XCTestCase { """ let listing = try GitHubCodeBackend.parseMonitoredPRsResponse(json) XCTAssertEqual(listing.viewerPRs.count, 1) - XCTAssertEqual(listing.viewerPRs[0].latestReviewID, "R_newest") + let fmt = ISO8601DateFormatter() + fmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + XCTAssertEqual(listing.viewerPRs[0].lastChangesRequestedAt, fmt.date(from: "2026-06-07T10:00:00Z")) } - /// When no review has a `submittedAt` (degraded payload), `max(by:)` over - /// equal keys still returns an element rather than throwing β€” verify we - /// at least don't crash and we do return one of the CHANGES_REQUESTED ids. - func testParseMonitoredPRsLatestReviewIDDegradesGracefullyWithoutSubmittedAt() throws { + func testParseMonitoredPRsLastChangesRequestedAtIsNilWhenNoChangesRequested() throws { let json = """ { "data": { @@ -685,15 +684,14 @@ final class BackendsTests: XCTestCase { "pullRequests": { "nodes": [ { - "number": 8, - "url": "https://github.com/a/b/pull/8", + "number": 9, + "url": "https://github.com/a/b/pull/9", "state": "OPEN", - "reviewDecision": "CHANGES_REQUESTED", + "reviewDecision": "APPROVED", "headRefOid": "abc", "latestReviews": { "nodes": [ - {"id": "R_a", "state": "CHANGES_REQUESTED"}, - {"id": "R_b", "state": "CHANGES_REQUESTED"} + {"id": "R_ok", "state": "APPROVED", "submittedAt": "2026-06-07T10:00:00Z"} ] } } @@ -708,11 +706,15 @@ final class BackendsTests: XCTestCase { """ let listing = try GitHubCodeBackend.parseMonitoredPRsResponse(json) XCTAssertEqual(listing.viewerPRs.count, 1) - XCTAssertNotNil(listing.viewerPRs[0].latestReviewID) - XCTAssertTrue(["R_a", "R_b"].contains(listing.viewerPRs[0].latestReviewID)) + XCTAssertNil(listing.viewerPRs[0].lastChangesRequestedAt) } - func testParseMonitoredPRsLatestReviewIDIsNilWhenNoChangesRequested() throws { + /// Merge commits (parents.totalCount >= 2) and rebase-style commits + /// matching the merge-message prefix list must NOT advance the + /// "agent substantively responded" timestamp. Otherwise pressing + /// GitHub's "Update branch" button or rebasing onto main would fool + /// the rule into thinking the agent pushed a fix. + func testParseMonitoredPRsLastSubstantiveCommitExcludesMergesAndRebases() throws { let json = """ { "data": { @@ -720,14 +722,26 @@ final class BackendsTests: XCTestCase { "pullRequests": { "nodes": [ { - "number": 9, - "url": "https://github.com/a/b/pull/9", + "number": 10, + "url": "https://github.com/a/b/pull/10", "state": "OPEN", - "reviewDecision": "APPROVED", + "reviewDecision": "CHANGES_REQUESTED", "headRefOid": "abc", - "latestReviews": { + "latestReviews": {"nodes": []}, + "commits": { "nodes": [ - {"id": "R_ok", "state": "APPROVED", "submittedAt": "2026-06-07T10:00:00Z"} + {"commit": {"oid": "1", "messageHeadline": "real fix", + "committedDate": "2026-06-01T00:00:00Z", + "parents": {"totalCount": 1}}}, + {"commit": {"oid": "2", "messageHeadline": "Merge branch 'main' into feature", + "committedDate": "2026-06-05T00:00:00Z", + "parents": {"totalCount": 2}}}, + {"commit": {"oid": "3", "messageHeadline": "Merge remote-tracking branch 'upstream/main'", + "committedDate": "2026-06-06T00:00:00Z", + "parents": {"totalCount": 2}}}, + {"commit": {"oid": "4", "messageHeadline": "Merge pull request #99", + "committedDate": "2026-06-07T00:00:00Z", + "parents": {"totalCount": 2}}} ] } } @@ -742,7 +756,21 @@ final class BackendsTests: XCTestCase { """ let listing = try GitHubCodeBackend.parseMonitoredPRsResponse(json) XCTAssertEqual(listing.viewerPRs.count, 1) - XCTAssertNil(listing.viewerPRs[0].latestReviewID) + let fmt = ISO8601DateFormatter() + fmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + // Only the real fix commit counts; the merge commits are excluded. + XCTAssertEqual(listing.viewerPRs[0].lastSubstantiveCommitAt, fmt.date(from: "2026-06-01T00:00:00Z")) + } + + /// Pure helper used by `parsePRNode`. Both Swift and tests share the + /// same prefix list so a future addition stays in sync. + func testIsMergeCommitMessage() { + XCTAssertTrue(GitHubCodeBackend.isMergeCommitMessage("Merge branch 'main' into feature/x")) + XCTAssertTrue(GitHubCodeBackend.isMergeCommitMessage("Merge remote-tracking branch 'upstream/main'")) + XCTAssertTrue(GitHubCodeBackend.isMergeCommitMessage("Merge pull request #42 from foo/bar")) + XCTAssertFalse(GitHubCodeBackend.isMergeCommitMessage("merge branch")) // case-sensitive prefix + XCTAssertFalse(GitHubCodeBackend.isMergeCommitMessage("Merge two records into one")) // not a merge prefix + XCTAssertFalse(GitHubCodeBackend.isMergeCommitMessage("Fix authentication bug")) } } diff --git a/Sources/Crow/App/AppDelegate.swift b/Sources/Crow/App/AppDelegate.swift index f4f67fe9..53ef7443 100644 --- a/Sources/Crow/App/AppDelegate.swift +++ b/Sources/Crow/App/AppDelegate.swift @@ -778,12 +778,6 @@ final class AppDelegate: NSObject, NSApplicationDelegate { tracker.onPRStatusTransitions = { [weak self] transitions in guard let self else { return } for transition in transitions { - // CROW-505 review #3: re-fire transitions skip the macOS - // notification so a stalled review doesn't generate a fresh - // banner every quiet window for the same review id. The - // re-prompt itself (below) is the useful effect; the - // notification was pure noise. - if transition.isReFire { continue } if let session = self.appState.sessions.first(where: { $0.id == transition.sessionID }) { self.notificationManager?.notifyPRTransition(transition, session: session) } diff --git a/Sources/Crow/App/IssueTracker.swift b/Sources/Crow/App/IssueTracker.swift index d6185b48..c5c51388 100644 --- a/Sources/Crow/App/IssueTracker.swift +++ b/Sources/Crow/App/IssueTracker.swift @@ -77,14 +77,11 @@ final class IssueTracker { var autoRebaseWatcherEnabledProvider: () -> Bool = { false } /// Reads the latest `AutoRespondSettings.respondToChangesRequested` - /// snapshot on every poll. Used solely to gate the stalled-`.changesRequested` - /// re-fire pass (CROW-505): without this gate an opted-out user would still - /// see a fresh "Changes Requested" macOS notification every quiet window - /// (via `onPRStatusTransitions` β†’ `notifyPRTransition`), even though - /// `AutoRespondCoordinator` correctly suppresses the actual dispatch β€” - /// pure notification noise that defeats their opt-out. Defaults to a - /// closure returning `false` so the re-fire path stays inert until - /// AppDelegate wires it. + /// snapshot on every poll. Gates the stateless "needs refine" emission + /// (CROW-508) β€” when the user has opted out, we suppress both the + /// notification and the dispatch, so they don't see fresh "Changes + /// Requested" banners every cooldown window. Defaults to a closure + /// returning `false` so the path stays inert until AppDelegate wires it. var respondToChangesRequestedProvider: () -> Bool = { false } /// Fires after Crow rebased a PR branch and force-pushed it. Wired in @@ -159,42 +156,36 @@ final class IssueTracker { /// the watcher gives up until the head commit changes. nonisolated static let maxAutoRebaseFailureRetries = 3 - /// Last observed `PRStatus` per session, used to compute transitions. - /// Populated lazily on first observation; that first poll never fires - /// transitions (matches the `previousReviewRequestIDs` first-fetch - /// behavior so existing PR state isn't replayed at startup). - /// Internal (not private) so `@testable` tests can seed it when driving the - /// stalled-re-fire pass (CROW-505) without going through a full poll. + /// Last observed `PRStatus` per session. Ephemeral (not persisted across + /// Crow restarts post-CROW-508): only used for in-process `.checksFailing` + /// edge detection. `.changesRequested` no longer reads from this map β€” + /// the stateless `PRStatus.needsRefine` rule derives the answer from the + /// PR snapshot on every poll. + /// Internal (not private) so `@testable` tests can seed it without going + /// through a full poll. var previousPRStatus: [UUID: PRStatus] = [:] - /// Per-emitted-transition metadata keyed by `PRStatusTransition.dedupeKey`, - /// used to suppress duplicates across the in-process lifetime and to power - /// the stalled `.changesRequested` re-fire pass (CROW-505): each entry - /// carries `emittedAt` (quiet-window clock) and `headShaAtEmit` (anti-loop - /// guarantee β€” re-fire only when the author hasn't pushed since dispatch). - /// Cleared per-session when we observe the rule "re-arm" (e.g. checks - /// move back to passing/pending) so subsequent transitions still fire. - /// Internal (not private) for the same `@testable` seeding reason as - /// `previousPRStatus`. - var emittedTransitionMeta: [String: EmittedTransitionMeta] = [:] - - /// Minimum elapsed time before a stalled `.changesRequested` transition can - /// re-fire (CROW-505). 10 min β‰ˆ 10 poll cycles β€” covers Claude Code's - /// longest "thinking on a hard finding" gaps before we conclude the agent - /// has actually stalled. Constant so we can tune if real-world telemetry - /// calls for it. - nonisolated static let stalledRefireQuietWindow: TimeInterval = 10 * 60 - - /// Max times a single `.changesRequested` emission may re-fire before - /// requiring a real edge event (reviewer rotates `latestReviewID`, - /// author pushes, bucket exits) to reset. Capped at 1 so the agent - /// receives at most one re-prompt per stall: the auto-respond prompt - /// explicitly invites the agent to reply rather than push when a finding - /// is unclear or disputed, and a reply is a legitimate terminal state - /// the head-SHA gate alone can't recognize. Without this cap the same - /// prompt would re-inject every quiet window indefinitely, potentially - /// driving duplicate reply comments on the reviewer's PR. - nonisolated static let maxStalledRefires: Int = 1 + /// PR URLs we've observed at least once in this Crow process. First poll + /// records the URL but does NOT dispatch β€” the next poll is the earliest + /// the stateless "needs refine" rule can emit. Ephemeral; a Crow restart + /// re-arms the skip so a single duplicate prompt across a restart + /// (acceptable per CROW-508) is the worst case. + var seenPRs: Set = [] + + /// Per-PR cooldown clock for "needs refine" dispatches. Keyed by PR URL + /// rather than session UUID so that two sessions linked to the same PR + /// can't both burn through the cooldown. Ephemeral by design β€” surviving + /// a restart isn't worth the persistence cost (worst case after restart + /// is one extra prompt, then the cooldown re-applies). + var lastRefineDispatchAt: [String: Date] = [:] + + /// Minimum gap between consecutive "needs refine" dispatches for the same + /// PR (CROW-508). 7 min is a deliberate middle of the 5–10 min range the + /// ticket suggested: long enough that an agent thinking through a hard + /// finding doesn't get re-prompted mid-thought, short enough that a true + /// stall surfaces within ~3 poll cycles. Constant so it can be tuned if + /// real-world telemetry calls for it. + nonisolated static let needsRefineCooldown: TimeInterval = 7 * 60 /// Guards the GitHub-scope console warning so it fires once per session. private var didLogGitHubScopeWarning = false @@ -214,13 +205,9 @@ final class IssueTracker { } func start() { - // Restore cross-restart state from disk before the first poll so the - // initial fetch doesn't re-fire transitions we already handled - // (CROW-456). Stale entries for sessions that no longer exist are - // dropped during hydration. - hydratePersistedState() - - // Initial fetch + // Initial fetch. Post-CROW-508 the tracker is stateless across + // restarts β€” the "needs refine" rule derives from PR data on every + // poll, so there's no `hydratePersistedState` to call here. Task { await refresh() } // Poll on interval @@ -672,7 +659,8 @@ final class IssueTracker { checksState: winner.checksState.isEmpty ? loser.checksState : winner.checksState, failedCheckNames: winner.failedCheckNames.isEmpty ? loser.failedCheckNames : winner.failedCheckNames, latestReviewStates: winner.latestReviewStates.isEmpty ? loser.latestReviewStates : winner.latestReviewStates, - latestReviewID: winner.latestReviewID ?? loser.latestReviewID + lastChangesRequestedAt: winner.lastChangesRequestedAt ?? loser.lastChangesRequestedAt, + lastSubstantiveCommitAt: winner.lastSubstantiveCommitAt ?? loser.lastSubstantiveCommitAt ) } @@ -906,6 +894,8 @@ final class IssueTracker { checksState: pr.checksState, failedCheckNames: pr.failedCheckNames, latestReviewStates: pr.latestReviewStates, + lastChangesRequestedAt: pr.lastChangesRequestedAt, + lastSubstantiveCommitAt: pr.lastSubstantiveCommitAt, updatedAt: pr.updatedAt ) } @@ -1509,20 +1499,23 @@ final class IssueTracker { // MARK: - PR Status (piggyback) /// Build `PRStatus` for each session with a `.pr` link by looking up the PR - /// in the viewer-PR payload. No extra gh calls. - private func applyPRStatuses(viewerPRs: [ViewerPR]) { + /// in the viewer-PR payload. No extra gh calls. Emits two kinds of + /// transitions: + /// - `.checksFailing`: still edge-detected from `previousPRStatus` so a + /// new failing commit only fires once per head. + /// - `.changesRequested`: stateless `PRStatus.needsRefine` rule (CROW-508). + /// Compares the latest CHANGES_REQUESTED review timestamp against the + /// latest substantive (non-merge, non-rebase) commit timestamp; emits + /// when the review is newer, gated by managed-terminal-idle, the + /// `respondToChangesRequested` user setting, the first-observation + /// skip (a PR's first poll never dispatches), and a per-PR cooldown. + func applyPRStatuses(viewerPRs: [ViewerPR]) { guard !viewerPRs.isEmpty else { return } let byURL = Dictionary(viewerPRs.map { ($0.url, $0) }, uniquingKeysWith: Self.mergePRRecords) - // Snapshot tracker state up front so we can skip the JSONStore write - // when this poll produced no observable change. Polls are quiet most - // of the time; without this guard `persistTrackerState` re-reads, - // re-encodes, and atomically rewrites `store.json` every 60s. - let priorPRStatus = previousPRStatus - let priorEmittedMeta = emittedTransitionMeta - var transitions: [PRStatusTransition] = [] let now = Date() + let respondToChangesRequested = respondToChangesRequestedProvider() let sessionsWithPRs = appState.sessions.filter { !$0.isManager } for session in sessionsWithPRs { let links = appState.links(for: session.id) @@ -1532,285 +1525,84 @@ final class IssueTracker { let newStatus = buildPRStatus(from: pr) let oldStatus = previousPRStatus[session.id] - // Re-arm rules whose triggering condition has cleared, so a future - // re-entry (approved β†’ changesRequested again, passing β†’ failing on - // a new commit) can fire even if we previously emitted. - // - // With CROW-456 the `.changesRequested` dedup key is keyed on the - // latest CHANGES_REQUESTED review id, so a new formal review - // naturally produces a different key. This cleanup bounds the - // in-memory map: when we leave the bucket entirely we drop every - // `changesRequested` entry for this session. - if let old = oldStatus { - if old.reviewStatus == .changesRequested && newStatus.reviewStatus != .changesRequested { - let prefix = "\(session.id.uuidString)|changesRequested|" - emittedTransitionMeta = emittedTransitionMeta.filter { !$0.key.hasPrefix(prefix) } - } - // CROW-505: a PR that merged or closed-unmerged while in - // `CHANGES_REQUESTED` keeps its `reviewDecision` unchanged - // (GitHub doesn't reset reviewDecision on close), so the - // bucket-exit rearm above never fires for it. Drop the - // meta entries explicitly on the openβ†’closed edge so the - // map can't accumulate dead entries that the re-fire - // predicate has to repeatedly reject. - if old.isOpen && !newStatus.isOpen { - let prefix = "\(session.id.uuidString)|changesRequested|" - emittedTransitionMeta = emittedTransitionMeta.filter { !$0.key.hasPrefix(prefix) } - } - if old.checksPass == .failing && newStatus.checksPass != .failing { - if let sha = old.headSha { - emittedTransitionMeta.removeValue(forKey: "\(session.id.uuidString)|checksFailing|\(sha)") - } - } - } - - let candidates = PRStatus.transitions( + // Checks-failing edge: fire only when transitioning from + // non-failing to failing. `transitions(from:to:…)` returns at + // most one `.checksFailing` and handles the `old == nil` first- + // observation case (only fires if `new` is itself failing). + transitions.append(contentsOf: PRStatus.transitions( from: oldStatus, to: newStatus, sessionID: session.id, prURL: prLink.url, prNumber: pr.number - ) - for t in candidates where emittedTransitionMeta[t.dedupeKey] == nil { - emittedTransitionMeta[t.dedupeKey] = EmittedTransitionMeta( - emittedAt: now, - headShaAtEmit: newStatus.headSha - ) - transitions.append(t) + )) + + // Stateless "needs refine" rule (CROW-508). Order matters: we + // emit BEFORE recording the PR as seen, so the first poll never + // dispatches. + if respondToChangesRequested, + session.kind != .review, + seenPRs.contains(prLink.url), + PRStatus.needsRefine( + status: newStatus, + terminalIdle: isManagedTerminalIdle(sessionID: session.id) + ), + cooldownElapsed(prURL: prLink.url, now: now) { + lastRefineDispatchAt[prLink.url] = now + transitions.append(PRStatusTransition( + kind: .changesRequested, + sessionID: session.id, + prURL: prLink.url, + prNumber: pr.number, + headSha: newStatus.headSha, + failedCheckNames: [] + )) + NSLog("[IssueTracker] needs-refine fired β€” session=%@, sha=%@, lastCR=%@, lastCommit=%@", + session.id.uuidString as NSString, + (newStatus.headSha ?? "") as NSString, + Self.iso(newStatus.lastChangesRequestedAt) as NSString, + Self.iso(newStatus.lastSubstantiveCommitAt) as NSString) } + seenPRs.insert(prLink.url) previousPRStatus[session.id] = newStatus appState.prStatus[session.id] = newStatus } - // CROW-505: stalled `.changesRequested` re-fire. After the per-session - // pass, walk the emitted-meta map and re-emit any review that has - // stalled β€” agent idle, head SHA unchanged since dispatch, quiet - // window elapsed. Anti-loop is preserved by the head-SHA gate: a real - // agent push advances `previousPRStatus[sid].headSha`, which kills the - // re-fire condition. - transitions.append(contentsOf: reFireStalledChangesRequested(now: now)) - if !transitions.isEmpty { onPRStatusTransitions?(transitions) } - if previousPRStatus != priorPRStatus || emittedTransitionMeta != priorEmittedMeta { - persistTrackerState() - } - applyAutoMerge(viewerPRs: viewerPRs) applyAutoRebase(viewerPRs: viewerPRs) } - // MARK: - Cross-restart persistence (CROW-456) - - /// Load `previousPRStatus` + `emittedTransitionMeta` from disk on startup. - /// Drops entries whose session UUID is no longer present so the in-memory - /// state can't grow without bound across long-lived installs. - /// - /// Migrates pre-CROW-505 stores that only wrote `emittedTransitionKeys`: - /// each legacy key gets a meta entry with `emittedAt = now` (starts the - /// quiet-window clock fresh β€” no immediate re-fire flood at upgrade) and - /// `headShaAtEmit` sourced from `previousPRStatus` (also persisted), so - /// the head-SHA anti-loop gate works on migrated entries too. - private func hydratePersistedState() { - guard let persisted = JSONStore().data.issueTrackerState else { return } - let validSessionIDs = Set(appState.sessions.map(\.id)) - var restored: [UUID: PRStatus] = [:] - for (uuidString, status) in persisted.previousPRStatus { - guard let uuid = UUID(uuidString: uuidString), validSessionIDs.contains(uuid) else { continue } - restored[uuid] = status - } - previousPRStatus = restored - - // Keep only entries whose session UUID prefix matches a live session. - // Each key has shape "|kind|…"; reject anything else. - let validUUIDPrefixes = Set(validSessionIDs.map { "\($0.uuidString)|" }) - - if let persistedMeta = persisted.emittedTransitionMeta { - // CROW-505+: read the rich map directly. - emittedTransitionMeta = persistedMeta.filter { entry in - validUUIDPrefixes.contains(where: { entry.key.hasPrefix($0) }) - } - } else { - // Pre-CROW-505 store β€” synthesize metadata from the legacy keys. - let now = Date() - var migrated: [String: EmittedTransitionMeta] = [:] - for key in persisted.emittedTransitionKeys - where validUUIDPrefixes.contains(where: { key.hasPrefix($0) }) { - let sha = Self.parseSessionID(fromDedupKey: key) - .flatMap { restored[$0]?.headSha } - migrated[key] = EmittedTransitionMeta(emittedAt: now, headShaAtEmit: sha) - } - emittedTransitionMeta = migrated - } - } - - /// Persist `previousPRStatus` + `emittedTransitionMeta` after every poll - /// that mutated them. `JSONStore.mutate` coalesces writes so this is cheap - /// even when transitions are quiet. - /// - /// Writes both the new map and an empty legacy `emittedTransitionKeys` - /// array: older Crow builds that read the array but not the map will see - /// no keys and re-fire as if from a fresh install (graceful downgrade). - private func persistTrackerState() { - let snapshot = PersistedIssueTrackerState( - previousPRStatus: Dictionary(uniqueKeysWithValues: previousPRStatus.map { ($0.key.uuidString, $0.value) }), - emittedTransitionKeys: [], - emittedTransitionMeta: emittedTransitionMeta - ) - JSONStore().mutate { data in - data.issueTrackerState = snapshot + /// True when the managed terminal for the session is at agent-launched + /// readiness with the agent in `.idle` (not working, waiting on input, + /// or already done). Both gates are required: a pre-launch terminal + /// can't be "stalled" because the agent never had a chance to run, and + /// firing into a busy agent would just interrupt productive work. + private func isManagedTerminalIdle(sessionID: UUID) -> Bool { + guard let managedTerminal = appState.terminals(for: sessionID).first(where: { $0.isManaged }) else { + return false } + guard appState.terminalReadiness[managedTerminal.id] == .agentLaunched else { return false } + return appState.hookState(for: sessionID).activityState == .idle } - // MARK: - Stalled `.changesRequested` re-fire (CROW-505) - - /// Re-emit any `.changesRequested` transition whose original auto-respond - /// prompt has stalled β€” agent idle, terminal launched, head SHA unchanged - /// since dispatch, and the quiet window has elapsed. Anti-loop guarantee: - /// a real agent push advances `previousPRStatus[sid].headSha`, breaking the - /// SHA-match condition, so we never re-fire on the agent's own response. - /// - /// Side effects (only when a re-fire actually emits): - /// - `emittedTransitionMeta[key].emittedAt` is bumped to `now` so the next - /// quiet-window clock starts fresh. - /// - `NSLog`'d distinctive line so operators can grep for the behavior. - /// - /// Note on iteration: we mutate `emittedTransitionMeta` during the loop, - /// but Swift dictionaries are value types β€” `for (key, meta) in - /// emittedTransitionMeta` iterates over a copy-on-write snapshot taken at - /// loop entry, so the in-loop assignment to `self.emittedTransitionMeta` - /// doesn't perturb the iteration order or visit set. - func reFireStalledChangesRequested(now: Date) -> [PRStatusTransition] { - // Gate on the user-facing `AutoRespondSettings.respondToChangesRequested` - // toggle. Without this gate a user who explicitly opted out would still - // get a fresh `Changes Requested` macOS notification every quiet window - // (`onPRStatusTransitions` β†’ `notifyPRTransition` posts unconditionally; - // only the actual prompt dispatch is gated downstream in - // `AutoRespondCoordinator.handle`) β€” pure notification noise that - // defeats their opt-out. New configs default to `true` post-CROW-505, - // so this gate only suppresses behavior for users who deliberately - // disabled it. - guard respondToChangesRequestedProvider() else { return [] } - - var out: [PRStatusTransition] = [] - for (key, meta) in emittedTransitionMeta { - // Only `.changesRequested` keys re-fire. Checks-failing already - // re-arms on a new commit (via the SHA-keyed dedup). - guard let sid = Self.parseSessionID(fromDedupKey: key), - Self.parseKind(fromDedupKey: key) == .changesRequested else { continue } - guard let session = appState.sessions.first(where: { $0.id == sid }) else { continue } - // Skip review sessions for the same reason `AutoRespondCoordinator` - // gates them β€” never commit on behalf of the reviewer. - guard session.kind != .review else { continue } - guard let prLink = appState.links(for: sid).first(where: { $0.linkType == .pr }) else { continue } - - let currentStatus = previousPRStatus[sid] - let agentActivity = appState.hookState(for: sid).activityState - let managedTerminal = appState.terminals(for: sid).first(where: { $0.isManaged }) - let readiness = managedTerminal.flatMap { appState.terminalReadiness[$0.id] } - - guard Self.shouldReFireStalledChangesRequested( - meta: meta, - currentStatus: currentStatus, - agentActivity: agentActivity, - terminalReadiness: readiness, - now: now, - quietWindow: Self.stalledRefireQuietWindow, - maxRefires: Self.maxStalledRefires - ) else { continue } - - guard let status = currentStatus else { continue } - let synthetic = PRStatusTransition( - kind: .changesRequested, - sessionID: sid, - prURL: prLink.url, - prNumber: QuickActionPrompts.parsePRNumber(from: prLink.url), - headSha: status.headSha, - latestReviewID: status.latestReviewID, - failedCheckNames: [], - isReFire: true - ) - out.append(synthetic) - // Refresh emittedAt + bump the count so the next quiet-window - // clock starts now and the per-emission cap (CROW-505 review #3) - // ratchets toward exhaustion. - let newCount = meta.reFireCount + 1 - emittedTransitionMeta[key] = EmittedTransitionMeta( - emittedAt: now, - headShaAtEmit: status.headSha, - reFireCount: newCount - ) - let elapsed = now.timeIntervalSince(meta.emittedAt) - NSLog("[IssueTracker] auto-refine re-fired (stalled review) β€” session=%@, review=%@, sha=%@, elapsed=%.0fs, count=%d/%d", - sid.uuidString as NSString, - (status.latestReviewID ?? "") as NSString, - (status.headSha ?? "") as NSString, - elapsed, - newCount, - Self.maxStalledRefires) - } - return out + /// True when no prior dispatch is recorded for this PR or the cooldown + /// has elapsed since the last one. Driven by `needsRefineCooldown`. + private func cooldownElapsed(prURL: String, now: Date) -> Bool { + guard let last = lastRefineDispatchAt[prURL] else { return true } + return now.timeIntervalSince(last) >= Self.needsRefineCooldown } - /// Decide whether a stalled `.changesRequested` entry is eligible for - /// re-fire. Pure (no AppState) so the conditions can be exercised - /// independently of the polling loop. - /// - /// Returns `true` only when all conditions from CROW-505 hold: - /// (1) PR still in CHANGES_REQUESTED and still open, (2) agent idle on - /// the managed terminal, which must have actually reached - /// `.agentLaunched`, (3) head SHA unchanged since dispatch (anti-loop), - /// (4) quiet window elapsed, and (5) re-fire count below the cap so a - /// legitimate reply-instead-of-push doesn't drive infinite re-prompts. - nonisolated static func shouldReFireStalledChangesRequested( - meta: EmittedTransitionMeta, - currentStatus: PRStatus?, - agentActivity: AgentActivityState, - terminalReadiness: TerminalReadiness?, - now: Date, - quietWindow: TimeInterval, - maxRefires: Int - ) -> Bool { - // (1) PR still in CHANGES_REQUESTED and still OPEN. The `isOpen` check - // is what stops the re-fire from prompting the agent to "address - // review feedback" on a merged or closed-unmerged PR: GitHub leaves - // `reviewDecision == CHANGES_REQUESTED` on close, so without this - // gate a dead PR would re-prompt indefinitely. The re-arm path in - // `applyPRStatuses` also drops the meta entry when `isOpen` flips - // false; this predicate-level check is defense in depth. - guard let s = currentStatus, s.reviewStatus == .changesRequested, s.isOpen else { return false } - // (2) Agent idle, not actively working / waiting on input. - guard agentActivity == .idle else { return false } - // (2b) Managed terminal actually launched the agent. Pre-launch terminals - // can't be "stalled" β€” the agent never had a chance to run. - guard terminalReadiness == .agentLaunched else { return false } - // (3) Head SHA unchanged since we last emitted. A nil baseline (no SHA - // known at emit time) can't prove the author hasn't pushed, so we - // conservatively refuse to re-fire. - guard let metaSha = meta.headShaAtEmit, metaSha == s.headSha else { return false } - // (4) Quiet window has elapsed. - guard now.timeIntervalSince(meta.emittedAt) >= quietWindow else { return false } - // (5) Re-fire cap not yet reached. Edge events (review id rotates, - // author pushes, bucket exits, PR closes) clear the entry and reset - // the count by virtue of producing a new emission. - return meta.reFireCount < maxRefires - } - - /// Parse the session UUID prefix of a dedup key. Returns nil for malformed - /// keys. The key format is `"||"`. - nonisolated static func parseSessionID(fromDedupKey key: String) -> UUID? { - guard let pipe = key.firstIndex(of: "|") else { return nil } - return UUID(uuidString: String(key[.. PRStatusTransition.Kind? { - let parts = key.split(separator: "|", maxSplits: 2, omittingEmptySubsequences: false) - guard parts.count >= 2 else { return nil } - return PRStatusTransition.Kind(rawValue: String(parts[1])) + /// ISO-8601 timestamp string for logging, or "-" for nil. + nonisolated static func iso(_ date: Date?) -> String { + guard let date else { return "-" } + let fmt = ISO8601DateFormatter() + fmt.formatOptions = [.withInternetDateTime] + return fmt.string(from: date) } // MARK: - Auto-Merge Watcher (CROW-299) @@ -2204,8 +1996,9 @@ final class IssueTracker { mergeable: mergeStatus, failedCheckNames: failedChecks, headSha: pr.headRefOid.isEmpty ? nil : pr.headRefOid, - latestReviewID: pr.latestReviewID, - isOpen: pr.state == "OPEN" + isOpen: pr.state == "OPEN", + lastChangesRequestedAt: pr.lastChangesRequestedAt, + lastSubstantiveCommitAt: pr.lastSubstantiveCommitAt ) } diff --git a/Tests/CrowTests/IssueTrackerDedupTests.swift b/Tests/CrowTests/IssueTrackerDedupTests.swift index 025b109d..c594377b 100644 --- a/Tests/CrowTests/IssueTrackerDedupTests.swift +++ b/Tests/CrowTests/IssueTrackerDedupTests.swift @@ -24,7 +24,8 @@ struct IssueTrackerDedupTests { checksState: String = "", failedCheckNames: [String] = [], latestReviewStates: [String] = [], - latestReviewID: String? = nil + lastChangesRequestedAt: Date? = nil, + lastSubstantiveCommitAt: Date? = nil ) -> IssueTracker.ViewerPR { IssueTracker.ViewerPR( number: number, @@ -43,7 +44,8 @@ struct IssueTrackerDedupTests { checksState: checksState, failedCheckNames: failedCheckNames, latestReviewStates: latestReviewStates, - latestReviewID: latestReviewID + lastChangesRequestedAt: lastChangesRequestedAt, + lastSubstantiveCommitAt: lastSubstantiveCommitAt ) } @@ -139,23 +141,35 @@ struct IssueTrackerDedupTests { #expect(byURL[url]?.state == "MERGED") } - // MARK: - CROW-456 latestReviewID propagation + // MARK: - CROW-508 timestamp propagation - @Test func mergePRRecordsPrefersWinnerLatestReviewID() { + @Test func mergePRRecordsPrefersWinnerLastChangesRequestedAt() { // Winner is whichever has the higher-rank state (MERGED beats OPEN). - // Forward its latestReviewID; fall back to loser's only when winner's - // is nil so we never lose round-2 dedup data during the merge. + // Forward its lastChangesRequestedAt; fall back to loser's only when + // winner's is nil so the stateless "needs refine" rule never loses + // the anchor timestamp during merge. let url = "https://github.com/radiusmethod/corveil/pull/201" - let openWithID = makeViewerPR(url: url, state: "OPEN", latestReviewID: "R_open") - let mergedWithID = makeViewerPR(url: url, state: "MERGED", latestReviewID: "R_merged") - #expect(IssueTracker.mergePRRecords(openWithID, mergedWithID).latestReviewID == "R_merged") - #expect(IssueTracker.mergePRRecords(mergedWithID, openWithID).latestReviewID == "R_merged") + let openTs = Date(timeIntervalSince1970: 1_700_000_000) + let mergedTs = Date(timeIntervalSince1970: 1_700_001_000) + let open = makeViewerPR(url: url, state: "OPEN", lastChangesRequestedAt: openTs) + let merged = makeViewerPR(url: url, state: "MERGED", lastChangesRequestedAt: mergedTs) + #expect(IssueTracker.mergePRRecords(open, merged).lastChangesRequestedAt == mergedTs) + #expect(IssueTracker.mergePRRecords(merged, open).lastChangesRequestedAt == mergedTs) } - @Test func mergePRRecordsBackfillsLatestReviewIDWhenWinnerLacks() { + @Test func mergePRRecordsBackfillsLastChangesRequestedAtWhenWinnerLacks() { let url = "https://github.com/radiusmethod/corveil/pull/201" - let openWithID = makeViewerPR(url: url, state: "OPEN", latestReviewID: "R_open") - let mergedNoID = makeViewerPR(url: url, state: "MERGED") - #expect(IssueTracker.mergePRRecords(openWithID, mergedNoID).latestReviewID == "R_open") + let openTs = Date(timeIntervalSince1970: 1_700_000_000) + let open = makeViewerPR(url: url, state: "OPEN", lastChangesRequestedAt: openTs) + let mergedNoTs = makeViewerPR(url: url, state: "MERGED") + #expect(IssueTracker.mergePRRecords(open, mergedNoTs).lastChangesRequestedAt == openTs) + } + + @Test func mergePRRecordsBackfillsLastSubstantiveCommitAtWhenWinnerLacks() { + let url = "https://github.com/radiusmethod/corveil/pull/201" + let openTs = Date(timeIntervalSince1970: 1_700_000_000) + let open = makeViewerPR(url: url, state: "OPEN", lastSubstantiveCommitAt: openTs) + let mergedNoTs = makeViewerPR(url: url, state: "MERGED") + #expect(IssueTracker.mergePRRecords(open, mergedNoTs).lastSubstantiveCommitAt == openTs) } } diff --git a/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift new file mode 100644 index 00000000..e1d063de --- /dev/null +++ b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift @@ -0,0 +1,212 @@ +import Foundation +import Testing +import CrowCore +import CrowProvider +@testable import Crow + +/// CROW-508 acceptance tests for the stateless "needs refine" dispatch path +/// in `IssueTracker`. Drives `applyPRStatuses` end-to-end with fixture +/// `PRRecord`s and asserts the emitted `.changesRequested` transitions, +/// covering each of the four acceptance criteria in the ticket: +/// +/// 1. Round-N stall β€” review newer than last commit β†’ fires on idle poll. +/// 2. Merge-from-main doesn't reset β€” `lastSubstantiveCommitAt` upstream- +/// filtered to exclude merge commits, so a "Update branch" push leaves +/// the rule firing. +/// 3. Real fix flips it β€” non-merge commit advances the timestamp, rule +/// stops firing. +/// 4. Restart-safe β€” fresh tracker (no `seenPRs`, no `lastRefineDispatchAt`) +/// skips first poll, fires on second, blocks on cooldown. +@Suite("IssueTracker stateless needsRefine (CROW-508)") +@MainActor +struct IssueTrackerNeedsRefineTests { + private let prURL = "https://github.com/foo/bar/pull/123" + private let shaA = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + private let reviewAt = Date(timeIntervalSince1970: 1_700_000_000) + private let beforeReview = Date(timeIntervalSince1970: 1_699_999_000) + private let afterReview = Date(timeIntervalSince1970: 1_700_001_000) + + /// Build a tracker + session + managed terminal + PR link + idle hook + /// state so the dispatch path's gates (terminal idle, agent activity + /// idle, `respondToChangesRequested` on) are satisfied. Callers can + /// override readiness/activity per test by mutating after construction. + private func makeTracker( + respondToChangesRequested: Bool = true, + readiness: TerminalReadiness = .agentLaunched, + agentIdle: Bool = true + ) -> (tracker: IssueTracker, sessionID: UUID, captured: TransitionCapture) { + let state = AppState() + let session = Session(name: "feature/stateless-test", kind: .work) + state.sessions = [session] + state.links[session.id] = [ + SessionLink(sessionID: session.id, label: "PR #123", url: prURL, linkType: .pr) + ] + let terminal = SessionTerminal( + sessionID: session.id, + name: "Claude", + cwd: "/tmp", + command: "claude", + isManaged: true + ) + state.terminals[session.id] = [terminal] + state.terminalReadiness[terminal.id] = readiness + // Hook state activity defaults to .idle from AppState β€” set + // explicitly when the test wants it busy. + if !agentIdle { + state.hookState(for: session.id).activityState = .working + } + + let tracker = IssueTracker(appState: state, providerManager: ProviderManager()) + tracker.respondToChangesRequestedProvider = { respondToChangesRequested } + + let captured = TransitionCapture() + tracker.onPRStatusTransitions = { transitions in + captured.append(transitions) + } + return (tracker, session.id, captured) + } + + /// Fixture PR with the stateless rule's inputs populated. Defaults + /// produce a "needs refine" snapshot β€” override per test as needed. + private func makeViewerPR( + reviewDecision: String = "CHANGES_REQUESTED", + state: String = "OPEN", + sha: String? = nil, + lastChangesRequestedAt: Date?, + lastSubstantiveCommitAt: Date? + ) -> PRRecord { + PRRecord( + number: 123, + url: prURL, + state: state, + mergeable: "MERGEABLE", + mergeStateStatus: "CLEAN", + reviewDecision: reviewDecision, + isDraft: false, + headRefName: "feature/stateless-test", + headRefOid: sha ?? shaA, + baseRefName: "main", + repoNameWithOwner: "foo/bar", + labels: [], + linkedIssueReferences: [], + checksState: "SUCCESS", + failedCheckNames: [], + latestReviewStates: ["CHANGES_REQUESTED"], + lastChangesRequestedAt: lastChangesRequestedAt, + lastSubstantiveCommitAt: lastSubstantiveCommitAt + ) + } + + // MARK: - Acceptance Test 1 β€” round-N stall fires on idle poll (after first-observation skip) + + @Test + func roundNStallFiresOnSecondPoll() { + let (tracker, _, captured) = makeTracker() + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + + // Poll 1: first observation β€” must NOT dispatch. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + #expect(tracker.seenPRs.contains(prURL)) + + // Poll 2: PR snapshot unchanged, rule still holds β†’ fires. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + #expect(tracker.lastRefineDispatchAt[prURL] != nil) + } + + // MARK: - Acceptance Test 2 β€” merge-from-main does NOT advance the timestamp + + @Test + func mergeFromMainDoesNotFlipTheRule() { + // The upstream parser excludes merge commits from `lastSubstantiveCommitAt`, + // so a "Update branch" push leaves the timestamp at its pre-review + // value. From the tracker's perspective: the rule keeps firing. + let (tracker, _, captured) = makeTracker() + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.seenPRs.insert(prURL) // skip first-observation gate + + // Snapshot is post-"Update branch": commit timestamp is still pre-review. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + } + + // MARK: - Acceptance Test 3 β€” real fix advances the timestamp, rule stops firing + + @Test + func realFixStopsTheRule() { + let (tracker, _, captured) = makeTracker() + tracker.seenPRs.insert(prURL) // skip first-observation gate + + // Real fix push: lastSubstantiveCommitAt advances past lastChangesRequestedAt. + let afterFix = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: afterReview) + tracker.applyPRStatuses(viewerPRs: [afterFix]) + #expect(captured.changesRequestedCount == 0) + } + + // MARK: - Acceptance Test 4 β€” restart safety: first-observation skip + cooldown + + @Test + func restartFreshTrackerSkipsThenFiresThenCoolsDown() { + let (tracker, _, captured) = makeTracker() + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + + // Poll 1 after restart: first-observation skip blocks dispatch. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + + // Poll 2: eligible. Fires once. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + let firstDispatchAt = tracker.lastRefineDispatchAt[prURL] + #expect(firstDispatchAt != nil) + + // Poll 3 (< cooldown elapsed): cooldown blocks further dispatch. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + #expect(tracker.lastRefineDispatchAt[prURL] == firstDispatchAt) + + // Simulate cooldown elapsed by backdating the dispatch clock. + tracker.lastRefineDispatchAt[prURL] = Date().addingTimeInterval(-IssueTracker.needsRefineCooldown - 1) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 2) + } + + // MARK: - Defense-in-depth gates + + @Test + func respondToChangesRequestedOffSuppressesDispatch() { + let (tracker, _, captured) = makeTracker(respondToChangesRequested: false) + tracker.seenPRs.insert(prURL) + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + } + + @Test + func busyAgentSuppressesDispatch() { + let (tracker, _, captured) = makeTracker(agentIdle: false) + tracker.seenPRs.insert(prURL) + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + } + + @Test + func preLaunchTerminalSuppressesDispatch() { + let (tracker, _, captured) = makeTracker(readiness: .uninitialized) + tracker.seenPRs.insert(prURL) + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + } +} + +/// Captures emitted transitions across multiple polls. Class so the test's +/// closure can mutate it without value-type aliasing surprises. +@MainActor +private final class TransitionCapture { + var all: [PRStatusTransition] = [] + func append(_ ts: [PRStatusTransition]) { all.append(contentsOf: ts) } + var changesRequestedCount: Int { all.filter { $0.kind == .changesRequested }.count } +} diff --git a/Tests/CrowTests/IssueTrackerStalledRefireTests.swift b/Tests/CrowTests/IssueTrackerStalledRefireTests.swift deleted file mode 100644 index 945e1fe9..00000000 --- a/Tests/CrowTests/IssueTrackerStalledRefireTests.swift +++ /dev/null @@ -1,492 +0,0 @@ -import Foundation -import Testing -import CrowCore -import CrowProvider -@testable import Crow - -/// CROW-505: `IssueTracker` re-fires `.changesRequested` transitions whose -/// auto-respond prompt has stalled (agent idle, head SHA unchanged, quiet -/// window elapsed). These tests cover the pure -/// `shouldReFireStalledChangesRequested` predicate plus the dedup-key -/// parse helpers that drive the wiring layer. -@Suite("IssueTracker stalled re-fire predicate (CROW-505)") -struct IssueTrackerStalledRefireTests { - - private let quietWindow: TimeInterval = 10 * 60 - private let now = Date(timeIntervalSince1970: 1_700_000_000) - private let shaA = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - private let shaB = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" - - private func meta(emittedSecondsAgo: TimeInterval = 11 * 60, headShaAtEmit: String? = nil, reFireCount: Int = 0) -> EmittedTransitionMeta { - EmittedTransitionMeta( - emittedAt: now.addingTimeInterval(-emittedSecondsAgo), - headShaAtEmit: headShaAtEmit ?? shaA, - reFireCount: reFireCount - ) - } - - /// Convenience wrapper so each predicate call doesn't have to repeat - /// `quietWindow` + `maxRefires`. The defaults match the production - /// `IssueTracker.maxStalledRefires`, so the test reads the constant - /// (locking against silent cap bumps). - private func shouldReFire( - meta: EmittedTransitionMeta, - currentStatus: PRStatus?, - agentActivity: AgentActivityState = .idle, - terminalReadiness: TerminalReadiness? = .agentLaunched, - maxRefires: Int = IssueTracker.maxStalledRefires - ) -> Bool { - IssueTracker.shouldReFireStalledChangesRequested( - meta: meta, - currentStatus: currentStatus, - agentActivity: agentActivity, - terminalReadiness: terminalReadiness, - now: now, - quietWindow: quietWindow, - maxRefires: maxRefires - ) - } - - private func status( - review: PRStatus.ReviewStatus = .changesRequested, - sha: String? = nil, - reviewID: String? = "R_1", - isOpen: Bool = true, - mergeable: PRStatus.MergeStatus = .unknown - ) -> PRStatus { - PRStatus( - checksPass: .pending, - reviewStatus: review, - mergeable: mergeable, - failedCheckNames: [], - headSha: sha ?? shaA, - latestReviewID: reviewID, - isOpen: isOpen - ) - } - - // MARK: - Bug repro (Test 1) - - @Test - func reFiresWhenAllConditionsMet() { - // The CROW-505 bug repro: PR still in CHANGES_REQUESTED, agent went - // idle without addressing all findings, head SHA hasn't advanced - // (agent didn't push), and quiet window has elapsed. - #expect(shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA) - )) - } - - // MARK: - Anti-loop guarantee (Test 2) - - @Test - func doesNotReFireWhenAgentPushedNewCommit() { - // The single most important invariant: a real agent push advances the - // head SHA, which kills the re-fire. This preserves the CROW-456 - // anti-loop guarantee β€” the agent's own response push doesn't - // re-trigger the prompt that told it to push. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaB) // SHA advanced - )) - } - - // MARK: - Round-2 reviewer review still fires (Test 3) - - @Test - func roundTwoFromReviewerProducesDistinctDedupKey() { - // Reviewer submitting a fresh formal review rotates `latestReviewID`, - // which produces a different `dedupeKey`. The existing - // `applyPRStatuses` path emits unconditionally on a new key β€” the - // re-fire predicate is never consulted for this case. Sanity check - // by computing the two keys and asserting they differ. - let sid = UUID() - let r1 = PRStatusTransition( - kind: .changesRequested, sessionID: sid, prURL: "https://example/1", - prNumber: 1, headSha: shaA, latestReviewID: "R_1" - ) - let r2 = PRStatusTransition( - kind: .changesRequested, sessionID: sid, prURL: "https://example/1", - prNumber: 1, headSha: shaA, latestReviewID: "R_2" - ) - #expect(r1.dedupeKey != r2.dedupeKey) - } - - // MARK: - Agent busy β€” no re-fire (Test 4) - - @Test - func doesNotReFireWhileAgentWorking() { - // Don't interrupt an agent that's actively working β€” they may already - // be addressing the feedback. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA), - agentActivity: .working - )) - } - - @Test - func doesNotReFireWhileAgentWaiting() { - // `.waiting` means the agent is blocked on user input β€” re-firing now - // would push another prompt into a queue the user is about to answer. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA), - agentActivity: .waiting - )) - } - - @Test - func doesNotReFireWhileAgentDone() { - // `.done` is the terminal "completed a turn" state β€” it's followed by - // a hook reset to `.idle`. Re-firing on `.done` would race. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA), - agentActivity: .done - )) - } - - // MARK: - Quiet window respected (Test 5) - - @Test - func doesNotReFireBeforeQuietWindowElapsed() { - // Quiet window: agents can spend 5+ minutes thinking on a hard - // finding. Don't re-prompt before they've had a chance to act. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow - 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA) - )) - } - - // MARK: - Terminal not launched (Test 6) - - @Test - func doesNotReFireWhenTerminalNotLaunched() { - // A pre-launch terminal (shellReady or earlier) can't be "stalled" β€” - // the agent never ran. Wait for the launch to finish first. - for readiness: TerminalReadiness in [.uninitialized, .surfaceCreated, .shellReady, .timedOut, .failed] { - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA), - terminalReadiness: readiness - ), "expected no re-fire at readiness \(readiness)") - } - } - - @Test - func doesNotReFireWhenTerminalReadinessUnknown() { - // No managed terminal at all β†’ no readiness entry. Can't re-fire. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA), - terminalReadiness: nil - )) - } - - // MARK: - PR no longer in CHANGES_REQUESTED - - @Test - func doesNotReFireWhenPRLeftChangesRequestedBucket() { - // If the PR moved to APPROVED or the bucket cleared otherwise, - // re-firing is meaningless. The re-arm path in `applyPRStatuses` - // also drops the entry on this transition, but the predicate - // double-checks. - for review: PRStatus.ReviewStatus in [.approved, .reviewRequired, .unknown] { - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(review: review, sha: shaA) - ), "expected no re-fire at review status \(review)") - } - } - - // MARK: - PR no longer open (CROW-505 review #2) - - @Test - func doesNotReFireOnMergedPR() { - // A PR that merged while in CHANGES_REQUESTED keeps reviewDecision - // unchanged, but the re-fire must not prompt the agent to "address - // review feedback" on a merged PR β€” there's nothing to push to. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA, isOpen: false, mergeable: .merged) - )) - } - - @Test - func doesNotReFireOnClosedUnmergedPR() { - // Same concern, the more common case: PR closed without merging. - // `mergeable` stays `.unknown` on closed PRs β€” only `isOpen` flags - // this as dead. The reviewer's specific blocker. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: status(sha: shaA, isOpen: false, mergeable: .unknown) - )) - } - - @Test - func doesNotReFireWhenCurrentStatusMissing() { - // No persisted PR status for the session β†’ we can't reason about - // SHA equality or bucket membership. Refuse. - #expect(!shouldReFire( - meta: meta(emittedSecondsAgo: quietWindow + 1, headShaAtEmit: shaA), - currentStatus: nil - )) - } - - @Test - func doesNotReFireWhenBaselineShaNil() { - // Migrated entries from before CROW-505 may have nil headShaAtEmit. - // Without a baseline we can't prove the author hasn't pushed β€” - // refuse rather than risk an unwanted re-fire. - #expect(!shouldReFire( - meta: EmittedTransitionMeta(emittedAt: now.addingTimeInterval(-(quietWindow + 1)), headShaAtEmit: nil), - currentStatus: status(sha: shaA) - )) - } - - // MARK: - Re-fire cap (CROW-505 review #3) - - @Test - func doesNotReFireOncePerEmissionCapReached() { - // The whole reason this cap exists: when the agent legitimately - // *replies* on the PR instead of pushing (a behavior the - // auto-respond prompt itself encourages), SHA stays put and the - // agent goes idle. Without a cap the same prompt re-injects every - // quiet window forever, potentially producing duplicate reply - // comments on the reviewer's PR. After `maxStalledRefires`, refuse - // until an edge event creates a new meta entry. - #expect(!shouldReFire( - meta: meta( - emittedSecondsAgo: quietWindow + 1, - headShaAtEmit: shaA, - reFireCount: IssueTracker.maxStalledRefires - ), - currentStatus: status(sha: shaA) - )) - } - - @Test - func reFiresWhileUnderCap() { - // Count below the cap still permits a re-fire. With the production - // cap of 1, only `reFireCount == 0` qualifies β€” but a customizable - // cap (passed explicitly here) lets us verify the < relation. - #expect(shouldReFire( - meta: meta( - emittedSecondsAgo: quietWindow + 1, - headShaAtEmit: shaA, - reFireCount: 2 - ), - currentStatus: status(sha: shaA), - maxRefires: 3 - )) - } - - // MARK: - Dedup-key parsers - - @Test - func parsesSessionIDAndKindFromDedupKey() { - let sid = UUID() - let crKey = "\(sid.uuidString)|changesRequested|R_1" - let cfKey = "\(sid.uuidString)|checksFailing|\(shaA)" - - #expect(IssueTracker.parseSessionID(fromDedupKey: crKey) == sid) - #expect(IssueTracker.parseSessionID(fromDedupKey: cfKey) == sid) - #expect(IssueTracker.parseKind(fromDedupKey: crKey) == .changesRequested) - #expect(IssueTracker.parseKind(fromDedupKey: cfKey) == .checksFailing) - } - - @Test - func parsersReturnNilForMalformedKeys() { - #expect(IssueTracker.parseSessionID(fromDedupKey: "not-a-uuid|changesRequested|R_1") == nil) - #expect(IssueTracker.parseSessionID(fromDedupKey: "no-pipe-anywhere") == nil) - #expect(IssueTracker.parseKind(fromDedupKey: "\(UUID().uuidString)|unknownKind|x") == nil) - #expect(IssueTracker.parseKind(fromDedupKey: "missing-discriminator") == nil) - } -} - -/// Integration coverage for `IssueTracker.reFireStalledChangesRequested`: the -/// map-walking pass that consults the predicate, builds synthetic transitions, -/// and bumps the dedup-meta timestamp. The predicate is covered above in -/// isolation; these tests catch wiring drift β€” gate-off behavior, terminal -/// resolution, and the `emittedAt` refresh that prevents same-poll re-fires. -@Suite("IssueTracker stalled re-fire wiring (CROW-505)") -@MainActor -struct IssueTrackerStalledRefireWiringTests { - - private let shaA = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - - /// Build an `IssueTracker` whose appState contains a single work session - /// with a PR link, a managed terminal at `.agentLaunched`, an idle hook - /// state, and a prior PRStatus in CHANGES_REQUESTED on `shaA`. Seeds an - /// `emittedTransitionMeta` entry that has been sitting for ~2Γ— the quiet - /// window β€” so the pure predicate would say "re-fire". - private func makeStalledTracker( - toggleOn: Bool, - sessionKind: SessionKind = .work, - agentActivity: AgentActivityState = .idle, - readiness: TerminalReadiness? = .agentLaunched - ) -> (IssueTracker, UUID, String) { - let state = AppState() - let sid = UUID() - let session = Session(id: sid, name: "feature-test", kind: sessionKind) - state.sessions = [session] - - let prLink = SessionLink( - sessionID: sid, - label: "PR #1", - url: "https://github.com/radiusmethod/crow/pull/1", - linkType: .pr - ) - state.links[sid] = [prLink] - - let terminal = SessionTerminal( - sessionID: sid, - name: "Claude Code", - cwd: "/tmp", - isManaged: true - ) - state.terminals[sid] = [terminal] - if let readiness { - state.terminalReadiness[terminal.id] = readiness - } - state.hookState(for: sid).activityState = agentActivity - - let tracker = IssueTracker(appState: state, providerManager: ProviderManager()) - tracker.respondToChangesRequestedProvider = { toggleOn } - - let status = PRStatus( - checksPass: .pending, - reviewStatus: .changesRequested, - mergeable: .unknown, - failedCheckNames: [], - headSha: shaA, - latestReviewID: "R_1" - ) - tracker.previousPRStatus[sid] = status - - let key = "\(sid.uuidString)|changesRequested|R_1" - let now = Date() - tracker.emittedTransitionMeta[key] = EmittedTransitionMeta( - emittedAt: now.addingTimeInterval(-IssueTracker.stalledRefireQuietWindow * 2), - headShaAtEmit: shaA - ) - - return (tracker, sid, key) - } - - // MARK: - The gate - - @Test - func toggleOffSuppressesAllSyntheticTransitions() { - // Reviewer's regression catch (review on PR #507): an opted-out user - // must not see synthetic transitions, even when every predicate - // condition is satisfied. Without the gate, `onPRStatusTransitions` - // would fire `notifyPRTransition` every quiet window β€” pure macOS - // notification noise for zero useful action. - let (tracker, _, key) = makeStalledTracker(toggleOn: false) - let metaBefore = tracker.emittedTransitionMeta[key] - - let transitions = tracker.reFireStalledChangesRequested(now: Date()) - - #expect(transitions.isEmpty) - // Meta must not be touched by the no-op path β€” bumping `emittedAt` - // when nothing fired would lose the original dispatch timestamp. - #expect(tracker.emittedTransitionMeta[key] == metaBefore) - } - - @Test - func toggleOnPermitsSyntheticTransitionAndBumpsEmittedAt() { - let now = Date() - let (tracker, sid, key) = makeStalledTracker(toggleOn: true) - let originalEmittedAt = tracker.emittedTransitionMeta[key]?.emittedAt - - let transitions = tracker.reFireStalledChangesRequested(now: now) - - #expect(transitions.count == 1) - #expect(transitions[0].kind == .changesRequested) - #expect(transitions[0].sessionID == sid) - #expect(transitions[0].headSha == shaA) - #expect(transitions[0].latestReviewID == "R_1") - // CROW-505 review #3: synthetic re-fires are tagged so AppDelegate - // can suppress the macOS notification while still letting - // AutoRespondCoordinator dispatch the re-prompt. - #expect(transitions[0].isReFire) - // emittedAt bumped to `now` so the next poll inside the quiet window - // doesn't immediately re-fire again. - #expect(tracker.emittedTransitionMeta[key]?.emittedAt == now) - #expect(tracker.emittedTransitionMeta[key]?.emittedAt != originalEmittedAt) - // CROW-505 review #3: re-fire count incremented so the per-emission - // cap eventually shuts the re-fire down even if the agent never - // pushes (e.g. legitimately replied instead). - #expect(tracker.emittedTransitionMeta[key]?.reFireCount == 1) - } - - @Test - func secondReFireOnSameMetaIsSuppressedByCap() { - // The cap regression test: after one re-fire on a stalled emission - // (production `maxStalledRefires = 1`), a subsequent poll under the - // same conditions must NOT emit another synthetic transition. - // Otherwise an agent that legitimately replied instead of pushing - // would receive the identical prompt every quiet window forever. - let (tracker, _, key) = makeStalledTracker(toggleOn: true) - - // First re-fire β€” emits, increments count, refreshes emittedAt. - let first = tracker.reFireStalledChangesRequested(now: Date()) - #expect(first.count == 1) - #expect(tracker.emittedTransitionMeta[key]?.reFireCount == 1) - - // Roll the clock past another quiet window so emittedAt is stale - // again, then walk the pass β€” the cap must hold. - let laterMeta = EmittedTransitionMeta( - emittedAt: Date().addingTimeInterval(-IssueTracker.stalledRefireQuietWindow * 2), - headShaAtEmit: shaA, - reFireCount: 1 - ) - tracker.emittedTransitionMeta[key] = laterMeta - - let second = tracker.reFireStalledChangesRequested(now: Date()) - #expect(second.isEmpty) - // Cap reached β†’ meta is left untouched (no count overflow, no - // emittedAt drift) so it remains a stable record of the original - // stall until an edge event clears it. - #expect(tracker.emittedTransitionMeta[key] == laterMeta) - } - - // MARK: - Review-session gate - - @Test - func reviewSessionsAreSkippedEvenWhenToggleOn() { - // Mirrors the `AutoRespondCoordinator.shouldSkipReviewSession` policy: - // never commit on behalf of a reviewer. The map-walking pass checks - // `session.kind != .review` so the synthetic transition never reaches - // the notification fan-out either. - let (tracker, _, _) = makeStalledTracker(toggleOn: true, sessionKind: .review) - let transitions = tracker.reFireStalledChangesRequested(now: Date()) - #expect(transitions.isEmpty) - } - - // MARK: - Activity / readiness gates (integration view of the predicate) - - @Test - func agentWorkingSuppressesSyntheticTransition() { - let (tracker, _, _) = makeStalledTracker(toggleOn: true, agentActivity: .working) - #expect(tracker.reFireStalledChangesRequested(now: Date()).isEmpty) - } - - @Test - func unlaunchedTerminalSuppressesSyntheticTransition() { - let (tracker, _, _) = makeStalledTracker(toggleOn: true, readiness: .shellReady) - #expect(tracker.reFireStalledChangesRequested(now: Date()).isEmpty) - } - - @Test - func missingManagedTerminalSuppressesSyntheticTransition() { - // Edge: no terminal entry at all (e.g. session created but terminal - // never materialized). Predicate sees `terminalReadiness: nil` and - // refuses. - let (tracker, _, _) = makeStalledTracker(toggleOn: true, readiness: nil) - #expect(tracker.reFireStalledChangesRequested(now: Date()).isEmpty) - } -} From bfff26fc43e0a4f30abc3833b41a77908ed311e9 Mon Sep 17 00:00:00 2001 From: Danny Gershman Date: Sun, 14 Jun 2026 21:40:42 -0400 Subject: [PATCH 2/3] Address review feedback on CROW-508 (#509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yellow finding: - Rename testParseMonitoredPRsLastSubstantiveCommitExcludesMergesAndRebases β†’ ...ExcludesMergeCommits. Test body never covered the rebase path. - Document the rebase-rewrites-committer-dates gap in parsePRNode as a known, accepted false negative (avoids a tree-equals-parents API call per PR per poll). Green findings: - Suppress macOS notification on cooldown re-fires for the same reviewer submission. Adds `isCooldownReFire` to PRStatusTransition; tracker records `lastNotifiedChangesRequestedAt` per PR. The agent re-prompt still fires (the useful effect); only the duplicate banner is skipped. - Drop the dead `id` field from `latestReviews` GraphQL selection β€” parsePRNode no longer reads it after CROW-508 removed `latestReviewID`. - Prune ephemeral state (seenPRs, lastRefineDispatchAt, lastNotifiedChangesRequestedAt) in applyPRStatuses for PR URLs no longer linked to a live session. Tests: 4 new (cooldown re-fire flag set/clear semantics across reviewer submissions, plus the stale-entry prune). πŸ¦β€β¬› Generated with Claude Code, orchestrated by Crow Co-Authored-By: Claude Crow-Session: 3AC2E8F0-213B-4D08-9B53-9510D856BE6C --- .../CrowCore/Models/PRStatusTransition.swift | 12 ++- .../Backends/GitHubCodeBackend.swift | 16 +++- .../CrowProviderTests/BackendsTests.swift | 14 +++- Sources/Crow/App/AppDelegate.swift | 5 ++ Sources/Crow/App/IssueTracker.swift | 44 ++++++++++- .../IssueTrackerNeedsRefineTests.swift | 74 +++++++++++++++++++ 6 files changed, 154 insertions(+), 11 deletions(-) diff --git a/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift b/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift index f4abf70a..d2dcbf1f 100644 --- a/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift +++ b/Packages/CrowCore/Sources/CrowCore/Models/PRStatusTransition.swift @@ -27,6 +27,14 @@ public struct PRStatusTransition: Sendable, Equatable { public let headSha: String? /// Names of failing checks (empty for `.changesRequested`). public let failedCheckNames: [String] + /// True when this `.changesRequested` is the second-or-later dispatch + /// for the same reviewer submission (cooldown re-fire on a stuck-idle + /// agent). The re-prompt to the agent is still useful β€” that's the + /// whole point of the cooldown β€” but a fresh macOS notification every + /// cooldown window for the same review id is pure noise, so + /// `AppDelegate.onPRStatusTransitions` skips `notifyPRTransition` when + /// this flag is set. Always `false` for `.checksFailing`. + public let isCooldownReFire: Bool public init( kind: Kind, @@ -34,7 +42,8 @@ public struct PRStatusTransition: Sendable, Equatable { prURL: String, prNumber: Int? = nil, headSha: String? = nil, - failedCheckNames: [String] = [] + failedCheckNames: [String] = [], + isCooldownReFire: Bool = false ) { self.kind = kind self.sessionID = sessionID @@ -42,6 +51,7 @@ public struct PRStatusTransition: Sendable, Equatable { self.prNumber = prNumber self.headSha = headSha self.failedCheckNames = failedCheckNames + self.isCooldownReFire = isCooldownReFire } } diff --git a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift index f09bc3a4..10c24017 100644 --- a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift +++ b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift @@ -322,7 +322,7 @@ public struct GitHubCodeBackend: CodeBackend { } } } - latestReviews(first: 20) { nodes { id state submittedAt } } + latestReviews(first: 20) { nodes { state submittedAt } } commits(last: 30) { nodes { commit { @@ -463,9 +463,17 @@ public struct GitHubCodeBackend: CodeBackend { // non-rebase commit timestamp anchors "has the agent substantively // responded since the review?". A merge commit (parent count >= 2) // or a commit whose subject starts with `Merge branch|remote-tracking| - // pull request` is excluded so the GitHub "Update branch" button and - // routine rebases can't trick the rule into thinking the agent - // pushed a fix. + // pull request` is excluded so the GitHub "Update branch" button + // (default merge mode) and routine merges from main can't trick the + // rule into thinking the agent pushed a fix. + // + // Known gap: a real `git rebase` (or "Update with rebase" on the + // Update-branch dropdown) rewrites the *committer* date of the + // existing feature commits to ~now. Those commits are not merge + // commits, so they pass the filter and DO advance + // `lastSubstantiveCommitAt` β€” a false negative the rule accepts as + // the cost of avoiding a tree-equals-parents API call per PR per + // poll. The ticket calls the tree check optional; we skip it in v1. let commitNodes = (node["commits"] as? [String: Any])?["nodes"] as? [[String: Any]] ?? [] let lastSubstantiveCommitAt = commitNodes .compactMap { node -> Date? in diff --git a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift index a0d1744e..cf54b703 100644 --- a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift +++ b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift @@ -712,9 +712,17 @@ final class BackendsTests: XCTestCase { /// Merge commits (parents.totalCount >= 2) and rebase-style commits /// matching the merge-message prefix list must NOT advance the /// "agent substantively responded" timestamp. Otherwise pressing - /// GitHub's "Update branch" button or rebasing onto main would fool - /// the rule into thinking the agent pushed a fix. - func testParseMonitoredPRsLastSubstantiveCommitExcludesMergesAndRebases() throws { + /// GitHub's "Update branch" button (default merge mode) or rebasing + /// onto main with a merge commit would fool the rule into thinking the + /// agent pushed a fix. + /// + /// Known gap (documented in `parsePRNode`): a real `git rebase` rewrites + /// the *committer* date of the feature commits themselves. Those commits + /// are not merge commits, so they pass the filter and DO advance + /// `lastSubstantiveCommitAt`. This test does not cover that path; the + /// stateless rule accepts the false negative as the cost of not paying + /// for a tree-equals-parents API call per PR per poll. + func testParseMonitoredPRsLastSubstantiveCommitExcludesMergeCommits() throws { let json = """ { "data": { diff --git a/Sources/Crow/App/AppDelegate.swift b/Sources/Crow/App/AppDelegate.swift index 53ef7443..5cc48685 100644 --- a/Sources/Crow/App/AppDelegate.swift +++ b/Sources/Crow/App/AppDelegate.swift @@ -778,6 +778,11 @@ final class AppDelegate: NSObject, NSApplicationDelegate { tracker.onPRStatusTransitions = { [weak self] transitions in guard let self else { return } for transition in transitions { + // Skip the macOS banner on cooldown re-fires: the dispatch + // re-prompts the agent (the useful part), but a fresh + // banner every 7 min for the same reviewer submission is + // pure noise the user already saw the first time. + if transition.isCooldownReFire { continue } if let session = self.appState.sessions.first(where: { $0.id == transition.sessionID }) { self.notificationManager?.notifyPRTransition(transition, session: session) } diff --git a/Sources/Crow/App/IssueTracker.swift b/Sources/Crow/App/IssueTracker.swift index c5c51388..4b964458 100644 --- a/Sources/Crow/App/IssueTracker.swift +++ b/Sources/Crow/App/IssueTracker.swift @@ -179,6 +179,17 @@ final class IssueTracker { /// is one extra prompt, then the cooldown re-applies). var lastRefineDispatchAt: [String: Date] = [:] + /// Per-PR record of the `lastChangesRequestedAt` we most recently posted + /// a macOS notification for. When a cooldown re-fire dispatches for the + /// same reviewer submission (same timestamp), the emitted transition + /// carries `isCooldownReFire = true` so `AppDelegate.onPRStatusTransitions` + /// skips the notification β€” the agent re-prompt is still useful, but a + /// fresh banner every 7 min for the same review is pure noise. A new + /// reviewer submission advances `lastChangesRequestedAt`, so the very + /// next dispatch is `isCooldownReFire = false` and notifies again. + /// Ephemeral; restart cost is one duplicate banner per PR, bounded. + var lastNotifiedChangesRequestedAt: [String: Date] = [:] + /// Minimum gap between consecutive "needs refine" dispatches for the same /// PR (CROW-508). 7 min is a deliberate middle of the 5–10 min range the /// ticket suggested: long enough that an agent thinking through a hard @@ -1517,10 +1528,17 @@ final class IssueTracker { let now = Date() let respondToChangesRequested = respondToChangesRequestedProvider() let sessionsWithPRs = appState.sessions.filter { !$0.isManager } + // Collect live PR URLs as we go so we can drop stale entries at the + // end of the pass. Without this, deleting a session (or its `.pr` + // link) leaves its PR URL in `seenPRs`/`lastRefineDispatchAt`/ + // `lastNotifiedChangesRequestedAt` for the rest of the process β€” + // bounded but not strictly clean. + var livePRURLs: Set = [] for session in sessionsWithPRs { let links = appState.links(for: session.id) guard let prLink = links.first(where: { $0.linkType == .pr }) else { continue } guard let pr = byURL[prLink.url] else { continue } + livePRURLs.insert(prLink.url) let newStatus = buildPRStatus(from: pr) let oldStatus = previousPRStatus[session.id] @@ -1549,19 +1567,31 @@ final class IssueTracker { ), cooldownElapsed(prURL: prLink.url, now: now) { lastRefineDispatchAt[prLink.url] = now + // Same-review cooldown re-fire suppresses the macOS + // notification (the dispatch + agent prompt are still + // valuable; the banner duplicates info the user already + // saw). A new reviewer submission advances + // `lastChangesRequestedAt`, flipping the flag back off so + // the next dispatch notifies. + let isCooldownReFire = lastNotifiedChangesRequestedAt[prLink.url] == newStatus.lastChangesRequestedAt + if !isCooldownReFire { + lastNotifiedChangesRequestedAt[prLink.url] = newStatus.lastChangesRequestedAt + } transitions.append(PRStatusTransition( kind: .changesRequested, sessionID: session.id, prURL: prLink.url, prNumber: pr.number, headSha: newStatus.headSha, - failedCheckNames: [] + failedCheckNames: [], + isCooldownReFire: isCooldownReFire )) - NSLog("[IssueTracker] needs-refine fired β€” session=%@, sha=%@, lastCR=%@, lastCommit=%@", + NSLog("[IssueTracker] needs-refine fired β€” session=%@, sha=%@, lastCR=%@, lastCommit=%@, reFire=%@", session.id.uuidString as NSString, (newStatus.headSha ?? "") as NSString, Self.iso(newStatus.lastChangesRequestedAt) as NSString, - Self.iso(newStatus.lastSubstantiveCommitAt) as NSString) + Self.iso(newStatus.lastSubstantiveCommitAt) as NSString, + (isCooldownReFire ? "yes" : "no") as NSString) } seenPRs.insert(prLink.url) @@ -1569,6 +1599,14 @@ final class IssueTracker { appState.prStatus[session.id] = newStatus } + // Prune ephemeral state for PRs no longer linked to any live + // session. Cheap (Set intersection / dictionary filter) and keeps + // the maps bounded by current PR count rather than lifetime + // process activity. + if !seenPRs.isEmpty { seenPRs.formIntersection(livePRURLs) } + lastRefineDispatchAt = lastRefineDispatchAt.filter { livePRURLs.contains($0.key) } + lastNotifiedChangesRequestedAt = lastNotifiedChangesRequestedAt.filter { livePRURLs.contains($0.key) } + if !transitions.isEmpty { onPRStatusTransitions?(transitions) } diff --git a/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift index e1d063de..21615974 100644 --- a/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift +++ b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift @@ -200,6 +200,80 @@ struct IssueTrackerNeedsRefineTests { tracker.applyPRStatuses(viewerPRs: [pr]) #expect(captured.changesRequestedCount == 0) } + + // MARK: - Cooldown-re-fire notification dedup (review feedback) + + @Test + func firstDispatchHasIsCooldownReFireFalse() { + // The first dispatch for a reviewer submission must NOT carry the + // notification-skip flag β€” the user is seeing the event for the + // first time and needs the macOS banner. + let (tracker, _, captured) = makeTracker() + tracker.seenPRs.insert(prURL) + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + #expect(captured.all.first?.isCooldownReFire == false) + } + + @Test + func cooldownReFireSetsTheFlagForSameReviewerSubmission() { + // Same review, agent went back to idle without committing, cooldown + // elapsed β†’ re-prompt is useful (agent is stuck) but the macOS + // banner is redundant. Carry the flag so AppDelegate skips it. + let (tracker, _, captured) = makeTracker() + tracker.seenPRs.insert(prURL) + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr]) + // Backdate the cooldown clock so the next call dispatches again. + tracker.lastRefineDispatchAt[prURL] = Date().addingTimeInterval(-IssueTracker.needsRefineCooldown - 1) + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 2) + #expect(captured.all[0].isCooldownReFire == false) + #expect(captured.all[1].isCooldownReFire == true) + } + + @Test + func newReviewerSubmissionClearsTheFlag() { + // A fresh CHANGES_REQUESTED review from the reviewer advances + // `lastChangesRequestedAt`. The next dispatch IS a new event and + // must notify β€” flag must flip back to false. + let (tracker, _, captured) = makeTracker() + tracker.seenPRs.insert(prURL) + let pr1 = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr1]) + tracker.lastRefineDispatchAt[prURL] = Date().addingTimeInterval(-IssueTracker.needsRefineCooldown - 1) + + let laterReviewAt = reviewAt.addingTimeInterval(3600) + let pr2 = makeViewerPR(lastChangesRequestedAt: laterReviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [pr2]) + #expect(captured.changesRequestedCount == 2) + #expect(captured.all[1].isCooldownReFire == false) + } + + // MARK: - Stale-entry prune (review feedback) + + @Test + func prunesEphemeralStateForPRsWithoutLiveSession() { + // A PR linked to a deleted (or never-existed) session must NOT + // linger in `seenPRs` / `lastRefineDispatchAt` / + // `lastNotifiedChangesRequestedAt`. Otherwise the maps grow + // unboundedly across the process's lifetime. + let (tracker, _, _) = makeTracker() + let stalePRURL = "https://github.com/foo/bar/pull/999" + tracker.seenPRs.insert(stalePRURL) + tracker.lastRefineDispatchAt[stalePRURL] = Date() + tracker.lastNotifiedChangesRequestedAt[stalePRURL] = reviewAt + + let livePR = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + tracker.applyPRStatuses(viewerPRs: [livePR]) + + #expect(!tracker.seenPRs.contains(stalePRURL)) + #expect(tracker.lastRefineDispatchAt[stalePRURL] == nil) + #expect(tracker.lastNotifiedChangesRequestedAt[stalePRURL] == nil) + // The live PR still gets recorded normally. + #expect(tracker.seenPRs.contains(prURL)) + } } /// Captures emitted transitions across multiple polls. Class so the test's From 34cac117d5c437b2746b8b0efcf625d24fd845aa Mon Sep 17 00:00:00 2001 From: Danny Gershman Date: Sun, 14 Jun 2026 22:23:50 -0400 Subject: [PATCH 3/3] Fix .withFractionalSeconds parse failure that made CROW-508 inert (#509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Red 1 β€” date parsing: ISO8601DateFormatter with .withFractionalSeconds is strict β€” it returns nil for any timestamp lacking a fraction. GitHub's GraphQL DateTime scalar emits 2026-06-15T01:28:17Z (no fraction), so both lastChangesRequestedAt and lastSubstantiveCommitAt were always nil in production. PRStatus.needsRefine would bail at its guard let lastReview and return false on every poll. CROW-508 never dispatched. Add GitHubCodeBackend.parseGitHubDateTime(_:) β€” tries the non-fractional shape first (GitHub's actual format), falls back to fractional. Use it for both submittedAt and committedDate in parsePRNode. Other call sites in this file still use the brittle pattern (parseReviewRequests, parseStaleMRResponse, parseStaleStateResponse) β€” out of scope here but called out in the helper's doc comment as a follow-up. Red 2 β€” masked tests: The existing tests built the expected Date with the same broken formatter, so nil == nil passed vacuously. Switch to hardcoded epoch- seconds Date construction so the parser and the assertions can never co-fail. Add explicit non-nil tests that lock in GitHub's actual format and fractional-shape resilience. Green 2 β€” two sessions sharing a PR URL: The first-observation skip is keyed by PR URL, but the loop wrote into seenPRs before later sessions checked it. Two sessions sharing a PR let the second one dispatch on poll 1. Snapshot seenPRs at the start of the loop. πŸ¦β€β¬› Generated with Claude Code, orchestrated by Crow Co-Authored-By: Claude Crow-Session: 3AC2E8F0-213B-4D08-9B53-9510D856BE6C --- .../Backends/GitHubCodeBackend.swift | 36 ++++++++++++-- .../CrowProviderTests/BackendsTests.swift | 47 ++++++++++++++++--- Sources/Crow/App/IssueTracker.swift | 18 +++++-- .../IssueTrackerNeedsRefineTests.swift | 39 +++++++++++++++ 4 files changed, 125 insertions(+), 15 deletions(-) diff --git a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift index 10c24017..a52b542b 100644 --- a/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift +++ b/Packages/CrowProvider/Sources/CrowProvider/Backends/GitHubCodeBackend.swift @@ -447,17 +447,21 @@ public struct GitHubCodeBackend: CodeBackend { } let latestReviewNodes = (node["latestReviews"] as? [String: Any])?["nodes"] as? [[String: Any]] ?? [] let reviewStates = latestReviewNodes.compactMap { $0["state"] as? String } - let dateFmt = ISO8601DateFormatter() - dateFmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] // Stateless "needs refine" rule (CROW-508): the latest CHANGES_REQUESTED // submission timestamp anchors "since when does the agent owe a // response?". `latestReviews(first: 20)` is intentionally broad // enough to cover several reviewers' latest reviews without paginating // β€” GitHub orders that connection by reviewer, not by recency, so a // narrow window could omit the CR we need. + // + // Parse with `parseGitHubDateTime` (tolerant of both fractional and + // non-fractional). GitHub's GraphQL `DateTime` scalar emits + // non-fractional ISO-8601 (`2026-06-15T01:28:17Z`), and an + // `ISO8601DateFormatter` configured with `.withFractionalSeconds` + // returns nil for that shape β€” silently disabling the rule. let lastChangesRequestedAt = latestReviewNodes .filter { ($0["state"] as? String) == "CHANGES_REQUESTED" } - .compactMap { ($0["submittedAt"] as? String).flatMap { dateFmt.date(from: $0) } } + .compactMap { ($0["submittedAt"] as? String).flatMap(Self.parseGitHubDateTime) } .max() // Stateless "needs refine" rule (CROW-508): the latest non-merge, // non-rebase commit timestamp anchors "has the agent substantively @@ -482,7 +486,7 @@ public struct GitHubCodeBackend: CodeBackend { if parents >= 2 { return nil } let message = (commit["messageHeadline"] as? String) ?? "" if Self.isMergeCommitMessage(message) { return nil } - return (commit["committedDate"] as? String).flatMap { dateFmt.date(from: $0) } + return (commit["committedDate"] as? String).flatMap(Self.parseGitHubDateTime) } .max() return PRRecord( @@ -519,6 +523,30 @@ public struct GitHubCodeBackend: CodeBackend { || trimmed.hasPrefix("Merge pull request ") } + /// Parse a GitHub `DateTime` scalar tolerantly. GitHub's GraphQL emits + /// the non-fractional ISO-8601 form (`2026-06-15T01:28:17Z`), but + /// `ISO8601DateFormatter` configured with `.withFractionalSeconds` + /// returns nil for that shape, and the inverse configuration rejects + /// any timestamp that DOES carry a fraction. Production code must + /// tolerate both β€” a strict formatter silently disables features that + /// depend on parsed timestamps (CROW-508 needsRefine was inert this + /// way; see PR #509 review). We try the non-fractional form first + /// (matches what GitHub actually emits today), then fall back to + /// fractional for resilience against future API drift. + /// + /// Other call sites in this file still use the brittle pattern (see + /// `parseReviewRequests`, `parseStaleMRResponse`, parseStaleStateResponse). + /// They're out of scope for this PR but should migrate to this helper + /// in a follow-up β€” same trap, different fields. + nonisolated static func parseGitHubDateTime(_ raw: String) -> Date? { + let plain = ISO8601DateFormatter() + plain.formatOptions = [.withInternetDateTime] + if let d = plain.date(from: raw) { return d } + let withFraction = ISO8601DateFormatter() + withFraction.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + return withFraction.date(from: raw) + } + static func parseReviewRequests( _ searchObj: [String: Any]?, dateFormatter: ISO8601DateFormatter, diff --git a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift index cf54b703..c7d2116a 100644 --- a/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift +++ b/Packages/CrowProvider/Tests/CrowProviderTests/BackendsTests.swift @@ -671,9 +671,14 @@ final class BackendsTests: XCTestCase { """ let listing = try GitHubCodeBackend.parseMonitoredPRsResponse(json) XCTAssertEqual(listing.viewerPRs.count, 1) - let fmt = ISO8601DateFormatter() - fmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] - XCTAssertEqual(listing.viewerPRs[0].lastChangesRequestedAt, fmt.date(from: "2026-06-07T10:00:00Z")) + // Construct the expected instant from epoch seconds β€” NOT from the + // same ISO8601DateFormatter the parser uses. A bug where the + // production formatter returns nil (CROW-508 PR #509 review) would + // pass the previous version of this assertion because both sides + // were nil. Epoch construction eliminates that co-failure mode. + // 2026-06-07T10:00:00Z = 1780826400 seconds since 1970. + let expected = Date(timeIntervalSince1970: 1780826400) + XCTAssertEqual(listing.viewerPRs[0].lastChangesRequestedAt, expected) } func testParseMonitoredPRsLastChangesRequestedAtIsNilWhenNoChangesRequested() throws { @@ -709,6 +714,33 @@ final class BackendsTests: XCTestCase { XCTAssertNil(listing.viewerPRs[0].lastChangesRequestedAt) } + /// Locks down GitHub's actual `DateTime` shape (no fractional seconds) + /// parsing to a non-nil value. The original CROW-508 patch used + /// `[.withInternetDateTime, .withFractionalSeconds]` which is strict + /// and rejects this format β€” feature was silently inert in production. + /// PR #509 review caught it. This test will fail loudly if a future + /// regression re-introduces the strict formatter. + func testParseGitHubDateTimeHandlesNonFractionalISO8601() { + // GitHub's actual format β€” no fraction. + let nonFractional = GitHubCodeBackend.parseGitHubDateTime("2026-06-15T01:28:17Z") + XCTAssertNotNil(nonFractional) + XCTAssertEqual(nonFractional, Date(timeIntervalSince1970: 1781486897)) + } + + /// Resilience against potential future API drift: a timestamp WITH a + /// fractional component must also parse. Both shapes flow through the + /// same helper. + func testParseGitHubDateTimeAlsoHandlesFractionalISO8601() { + let withFraction = GitHubCodeBackend.parseGitHubDateTime("2026-06-15T01:28:17.123Z") + XCTAssertNotNil(withFraction) + } + + /// Garbage input returns nil, doesn't crash. + func testParseGitHubDateTimeReturnsNilForGarbage() { + XCTAssertNil(GitHubCodeBackend.parseGitHubDateTime("not a date")) + XCTAssertNil(GitHubCodeBackend.parseGitHubDateTime("")) + } + /// Merge commits (parents.totalCount >= 2) and rebase-style commits /// matching the merge-message prefix list must NOT advance the /// "agent substantively responded" timestamp. Otherwise pressing @@ -764,10 +796,11 @@ final class BackendsTests: XCTestCase { """ let listing = try GitHubCodeBackend.parseMonitoredPRsResponse(json) XCTAssertEqual(listing.viewerPRs.count, 1) - let fmt = ISO8601DateFormatter() - fmt.formatOptions = [.withInternetDateTime, .withFractionalSeconds] - // Only the real fix commit counts; the merge commits are excluded. - XCTAssertEqual(listing.viewerPRs[0].lastSubstantiveCommitAt, fmt.date(from: "2026-06-01T00:00:00Z")) + // Only the real fix commit counts; merges are excluded. Constructed + // from epoch seconds for the same anti-co-failure reason as the + // CHANGES_REQUESTED timestamp test above. + // 2026-06-01T00:00:00Z = 1780272000. + XCTAssertEqual(listing.viewerPRs[0].lastSubstantiveCommitAt, Date(timeIntervalSince1970: 1780272000)) } /// Pure helper used by `parsePRNode`. Both Swift and tests share the diff --git a/Sources/Crow/App/IssueTracker.swift b/Sources/Crow/App/IssueTracker.swift index 4b964458..3047e56f 100644 --- a/Sources/Crow/App/IssueTracker.swift +++ b/Sources/Crow/App/IssueTracker.swift @@ -1527,6 +1527,16 @@ final class IssueTracker { var transitions: [PRStatusTransition] = [] let now = Date() let respondToChangesRequested = respondToChangesRequestedProvider() + // Snapshot `seenPRs` BEFORE the loop so the first-observation skip + // is consistent for every session this poll, regardless of order. + // Two sessions linked to the same PR URL: if we read live state, + // session A inserts and session B sees the URL already-seen and + // dispatches on the very first poll. With the snapshot, both + // sessions see "not seen yet" β†’ both skip, then we record the + // URL once. Cooldown still bounds it either way, but the snapshot + // matches the documented "first poll for a PR never dispatches" + // behavior precisely. (PR #509 review.) + let seenPRsAtStart = seenPRs let sessionsWithPRs = appState.sessions.filter { !$0.isManager } // Collect live PR URLs as we go so we can drop stale entries at the // end of the pass. Without this, deleting a session (or its `.pr` @@ -1555,12 +1565,12 @@ final class IssueTracker { prNumber: pr.number )) - // Stateless "needs refine" rule (CROW-508). Order matters: we - // emit BEFORE recording the PR as seen, so the first poll never - // dispatches. + // Stateless "needs refine" rule (CROW-508). First-observation + // skip uses the start-of-poll snapshot so two sessions sharing + // a PR can't race each other through the gate. if respondToChangesRequested, session.kind != .review, - seenPRs.contains(prLink.url), + seenPRsAtStart.contains(prLink.url), PRStatus.needsRefine( status: newStatus, terminalIdle: isManagedTerminalIdle(sessionID: session.id) diff --git a/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift index 21615974..c03ccc70 100644 --- a/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift +++ b/Tests/CrowTests/IssueTrackerNeedsRefineTests.swift @@ -274,6 +274,45 @@ struct IssueTrackerNeedsRefineTests { // The live PR still gets recorded normally. #expect(tracker.seenPRs.contains(prURL)) } + + // MARK: - Two sessions sharing a PR URL (Green 2 β€” review feedback) + + @Test + func twoSessionsSharingPRBothSkipFirstObservation() { + // Two sessions linked to the same PR within one poll: with naive + // ordering, session A inserts into `seenPRs` and session B then sees + // the URL already-seen β†’ B dispatches on the very first poll. The + // snapshot-at-start guards against that. + let state = AppState() + let sessionA = Session(name: "session-a", kind: .work) + let sessionB = Session(name: "session-b", kind: .work) + state.sessions = [sessionA, sessionB] + state.links[sessionA.id] = [SessionLink(sessionID: sessionA.id, label: "PR #123", url: prURL, linkType: .pr)] + state.links[sessionB.id] = [SessionLink(sessionID: sessionB.id, label: "PR #123", url: prURL, linkType: .pr)] + let termA = SessionTerminal(sessionID: sessionA.id, name: "A", cwd: "/tmp", command: "claude", isManaged: true) + let termB = SessionTerminal(sessionID: sessionB.id, name: "B", cwd: "/tmp", command: "claude", isManaged: true) + state.terminals[sessionA.id] = [termA] + state.terminals[sessionB.id] = [termB] + state.terminalReadiness[termA.id] = .agentLaunched + state.terminalReadiness[termB.id] = .agentLaunched + + let tracker = IssueTracker(appState: state, providerManager: ProviderManager()) + tracker.respondToChangesRequestedProvider = { true } + let captured = TransitionCapture() + tracker.onPRStatusTransitions = { captured.append($0) } + + let pr = makeViewerPR(lastChangesRequestedAt: reviewAt, lastSubstantiveCommitAt: beforeReview) + + // Poll 1: both sessions must observe "not seen yet" and skip. + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 0) + #expect(tracker.seenPRs.contains(prURL)) + + // Poll 2: both sessions can dispatch (cooldown is per-PR so only + // the first one wins, but the snapshot fix is about poll 1). + tracker.applyPRStatuses(viewerPRs: [pr]) + #expect(captured.changesRequestedCount == 1) + } } /// Captures emitted transitions across multiple polls. Class so the test's