redhat-performance · vishnuchalla · Jun 12, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 10, 2026
diff --git a/bugzooka/analysis/pr_analyzer.py b/bugzooka/analysis/pr_analyzer.py
@@ -49,37 +49,41 @@ def _sanitize_gemini_output(result: str) -> str:
     return sanitized
 
 
-def _parse_pr_request(text: str) -> Optional[Tuple[str, str, str, str]]:
+def _parse_pr_request(text: str) -> Optional[Tuple[str, str, list, str]]:
     """
     Parse PR analysis request from text.
 
-    Expected format: "analyze pr: {github_url}, compare with {version}"
+    Expected formats:
+        Single PR:  "analyze pr: {github_url}, compare with {version}"
+        Multi PR:   "analyze pr: {url1} {url2}, compare with {version}"
 
-    Both PR URL and version are required.
+    All PRs must be from the same org/repo. Both PR URL(s) and version are required.
 
     :param text: Message text to parse
-    :return: Tuple of (organization, repository, pr_number, version) or None if invalid
+    :return: Tuple of (organization, repository, pr_numbers, version) or None if invalid
     """
-    # Match GitHub PR URLs like https://github.com/org/repo/pull/123
     pr_pattern = r"https?://github\.com/([^/]+)/([^/]+)/pull/(\d+)"
-    pr_match = re.search(pr_pattern, text)
+    pr_matches = re.findall(pr_pattern, text)
 
-    if not pr_match:
+    if not pr_matches:
         return None
 
-    org, repo, pr_number = pr_match.groups()
+    org, repo, _ = pr_matches[0]
+    for match_org, match_repo, _ in pr_matches[1:]:
+        if match_org != org or match_repo != repo:
+            return None
+
+    pr_numbers = [m[2] for m in pr_matches]
 
-    # Extract version from "compare with X.XX" pattern - REQUIRED
     version_pattern = r"compare\s+with\s+(\d+\.\d+)"
     version_match = re.search(version_pattern, text, re.IGNORECASE)
 
     if not version_match:
-        # Version is required - return None to trigger validation error
         return None
 
     version = version_match.group(1)
 
-    return org, repo, pr_number, version
+    return org, repo, pr_numbers, version
 
 
 async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
@@ -117,9 +121,11 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
             ),
         )
 
-    org, repo, pr_number, version = parsed
+    org, repo, pr_numbers, version = parsed
+    pr_display = ", ".join(f"#{pr}" for pr in pr_numbers)
     logger.info(
-        f"🔍 PR analysis requested for {org}/{repo}/pull/{pr_number} (OpenShift {version})"
+        "PR analysis requested for %s/%s %s (OpenShift %s)",
+        org, repo, pr_display, version,
     )
 
     # Ensure MCP client is initialized
@@ -132,19 +138,23 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
 
     try:
         logger.info(
-            "Starting PR performance analysis: %s/%s#%s (OpenShift %s)",
+            "Starting PR performance analysis: %s/%s %s (OpenShift %s)",
             org,
             repo,
-            pr_number,
+            pr_display,
             version,
         )
 
         # Create prompt for PR analysis using centralized prompts
-        pr_url = f"https://github.com/{org}/{repo}/pull/{pr_number}"
+        pr_urls = ", ".join(
+            f"https://github.com/{org}/{repo}/pull/{pr}" for pr in pr_numbers
+        )
+        pr_numbers_str = ",".join(pr_numbers)
 
         system_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["system"]
         user_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["user"].format(
-            org=org, repo=repo, pr_number=pr_number, pr_url=pr_url, version=version
+            org=org, repo=repo, pr_numbers=pr_numbers_str,
+            pr_urls=pr_urls, version=version,
         )
         assistant_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["assistant"]
 
@@ -184,22 +194,22 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
             # Also check if the result is very short (likely just a "no data" message)
             if len(result.strip()) < 200:
                 logger.info(
-                    "No performance test data found for PR %s/%s#%s",
+                    "No performance test data found for PR %s/%s %s",
                     org,
                     repo,
-                    pr_number,
+                    pr_display,
                 )
                 return make_response(
                     success=True,
                     message=(
-                        f"No performance test results found for PR #{pr_number}\n\n"
+                        f"No performance test results found for PR {pr_display}\n\n"
                         f"This could mean:\n"
                         f"- Performance tests haven't run yet for this PR\n"
                         f"- The PR doesn't trigger performance test jobs\n"
                         f"- Test results are not yet available in the Orion database\n\n"
                         f"Check back later or verify that performance tests are configured for this repository."
                     ),
-                    pr_info=(org, repo, pr_number, version),
+                    pr_info=(org, repo, pr_numbers, version),
                 )
 
         logger.info("PR analysis completed successfully (%d chars)", len(result))
@@ -210,7 +220,7 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
         return make_response(
             success=True,
             message=sanitized_result,
-            pr_info=(org, repo, pr_number, version),
+            pr_info=(org, repo, pr_numbers, version),
         )
 
     except Exception as e:

diff --git a/bugzooka/analysis/prompts.py b/bugzooka/analysis/prompts.py
@@ -75,37 +75,50 @@
 5. **Sort All Metrics**: ALWAYS sort metrics by absolute percentage change (highest to lowest) in all tables and lists.
 6. **Format Output**: Use Slack-friendly formatting as specified in user instructions.
 """,
-    "user": """Please analyze the performance of this pull request:
+    "user": """Please analyze the performance of the following pull request(s):
 - Organization: {org}
 - Repository: {repo}
-- Pull Request Number: {pr_number}
-- PR URL: {pr_url}
+- Pull Request Number(s): {pr_numbers}
+- PR URL(s): {pr_urls}
 - OpenShift Version: {version}
 
+**Tool Calling Instructions:**
+- If analyzing a SINGLE PR, call `openshift_report_on_pr` with the `pull_request` parameter set to the PR number.
+- If analyzing MULTIPLE PRs, call `openshift_report_on_pr` with the `pull_requests` parameter set to a comma-separated string of PR numbers (e.g. "3169,3170"). The tool returns results for each PR under the `pulls` key.
+
 **Required Output Structure:**
 Output ONLY the sections below with ABSOLUTELY NO additional commentary, thinking process, or meta-commentary.
 
 *Performance Impact Assessment*
-- Overall Impact: State EXACTLY one of: ":exclamation: *Regression* :exclamation:" (only if 1 or more significant regression found), ":rocket: *Improvement* :rocket:" (only if 1 or more significant improvement found), ":arrow_right: *Neutral* :arrow_right:" (no significant changes)
-- Significant regressions (≥10%): List with 🛑 emoji, metric name and short config name, grouped by config. ONLY include if |change| >= 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
-- Significant improvements (≥10%): List with 🚀 emoji, metric name and short config name, grouped by config. ONLY include if |change| >= 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
-- Moderate regressions (5-10%): List with ⚠️ emoji, metric name and short config name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
-- Moderate improvements (5-10%): List with ✅ emoji, metric name and short config name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
+- For SINGLE PR: show one assessment section.
+- For MULTIPLE PRs: show a SEPARATE assessment section per PR (e.g. "*PR #3169*" then "*PR #3170*"). Each PR is independently compared against the periodic baseline — do NOT compare PRs against each other.
+- Per-PR assessment structure:
+  - Overall Impact: State EXACTLY one of: ":exclamation: *Regression* :exclamation:" (only if 1 or more significant regression found), ":rocket: *Improvement* :rocket:" (only if 1 or more significant improvement found), ":arrow_right: *Neutral* :arrow_right:" (no significant changes)
+  - Significant regressions (≥10%): List with 🛑 emoji, metric name, grouped by config. ONLY include if |change| >= 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
+  - Significant improvements (≥10%): List with 🚀 emoji, metric name, grouped by config. ONLY include if |change| >= 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
+  - Moderate regressions (5-10%): List with ⚠️ emoji, metric name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
+  - Moderate improvements (5-10%): List with ✅ emoji, metric name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
 - End this section with a line of 80 equals signs.
 
 *ONLY IF SIGNIFICANT REGRESSION IS FOUND, INCLUDE THE FOLLOWING SECTION*
 *Regression Analysis*:
-1. Root Cause: Identify the most likely cause of the significant regression. Be as specific as possible.
-2. Impact: Describe the impact of the significant regression on the system.
-3. Recommendations: Suggest corrective actions to address the significant regression.
+- For SINGLE PR: one regression analysis section.
+- For MULTIPLE PRs: a SEPARATE regression analysis per PR that has significant regressions. Label each with the PR number (e.g. "*Regression Analysis (PR #3169)*").
+- Per-PR regression analysis structure:
+  1. Root Cause: Identify the most likely cause of the significant regression. Be as specific as possible.
+  2. Impact: Describe the impact of the significant regression on the system.
+  3. Recommendations: Suggest corrective actions to address the significant regression.
 End this section with a line of 80 equals signs.
 
 *Most Impacted Metrics*
 For each config:
 - Transform config name to readable format: "/orion/examples/trt-external-payload-cluster-density.yaml" → "cluster-density"
 - Table header: e.g. *Config: cluster-density*
 - MANDATORY: Include ONLY top 10 metrics sorted by absolute percentage change (highest impact first)
-- Columns: Metric | Baseline | PR Value | Change (%)
+- For SINGLE PR — Columns: Metric | Baseline | PR Value | Change (%)
+- For MULTIPLE PRs — use a single combined table with all PRs side by side:
+    Columns: Metric | Baseline | PR #X Value | PR #X Change(%) | PR #Y Value | PR #Y Change(%) | ...
+    This lets the user compare PRs at a glance without scrolling between separate tables.
 - Format tables with `code` blocks, adjust column widths to fit data
 - No emojis in tables
 - Separate each config section with 80 equals signs.

diff --git a/bugzooka/integrations/slack_socket_listener.py b/bugzooka/integrations/slack_socket_listener.py
@@ -168,10 +168,12 @@ def _process_mention(self, event: Dict[str, Any]) -> None:
                     )
 
                 if analysis_result["success"]:
-                    org, repo, pr_number, version = analysis_result["pr_info"]
+                    org, repo, pr_numbers, version = analysis_result["pr_info"]
                     _pr_repo = f"{org}/{repo}"
+                    pr_display = ", ".join(f"#{pr}" for pr in pr_numbers)
                     self.logger.info(
-                        f"✅ Sent PR analysis for {org}/{repo}#{pr_number} (OpenShift {version}) to {user}"
+                        "Sent PR analysis for %s/%s %s (OpenShift %s) to %s",
+                        org, repo, pr_display, version, user,
                     )
                 else:
                     self.logger.warning(

diff --git a/tests/test_pr_analyzer.py b/tests/test_pr_analyzer.py
@@ -0,0 +1,79 @@
+"""
+Tests for PR analysis request parsing and output sanitization.
+"""
+from bugzooka.analysis.pr_analyzer import _parse_pr_request, _sanitize_gemini_output
+
+
+class TestParsePrRequest:
+    """Tests for _parse_pr_request() — single and multi-PR parsing."""
+
+    def test_single_pr(self):
+        text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169, compare with 5.0"
+        result = _parse_pr_request(text)
+        assert result == ("openshift", "ovn-kubernetes", ["3169"], "5.0")
+
+    def test_multi_pr_space_separated(self):
+        text = (
+            "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
+            "https://github.com/openshift/ovn-kubernetes/pull/3170, compare with 5.0"
+        )
+        result = _parse_pr_request(text)
+        assert result == ("openshift", "ovn-kubernetes", ["3169", "3170"], "5.0")
+
+    def test_multi_pr_three_prs(self):
+        text = (
+            "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/100 "
+            "https://github.com/openshift/ovn-kubernetes/pull/200 "
+            "https://github.com/openshift/ovn-kubernetes/pull/300, compare with 4.19"
+        )
+        result = _parse_pr_request(text)
+        assert result == ("openshift", "ovn-kubernetes", ["100", "200", "300"], "4.19")
+
+    def test_mixed_repos_returns_none(self):
+        text = (
+            "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
+            "https://github.com/openshift/installer/pull/100, compare with 5.0"
+        )
+        assert _parse_pr_request(text) is None
+
+    def test_mixed_orgs_returns_none(self):
+        text = (
+            "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
+            "https://github.com/other-org/ovn-kubernetes/pull/100, compare with 5.0"
+        )
+        assert _parse_pr_request(text) is None
+
+    def test_no_version_returns_none(self):
+        text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169"
+        assert _parse_pr_request(text) is None
+
+    def test_no_url_returns_none(self):
+        text = "analyze pr: some random text, compare with 5.0"
+        assert _parse_pr_request(text) is None
+
+    def test_version_case_insensitive(self):
+        text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169, Compare With 4.19"
+        result = _parse_pr_request(text)
+        assert result == ("openshift", "ovn-kubernetes", ["3169"], "4.19")
+
+    def test_http_url(self):
+        text = "analyze pr: http://github.com/openshift/ovn-kubernetes/pull/3169, compare with 5.0"
+        result = _parse_pr_request(text)
+        assert result == ("openshift", "ovn-kubernetes", ["3169"], "5.0")
+
+
+class TestSanitizeGeminiOutput:
+    """Tests for _sanitize_gemini_output()."""
+
+    def test_removes_thinking_before_marker(self):
+        result = "Let me think about this...\n*Performance Impact Assessment*\nActual content"
+        sanitized = _sanitize_gemini_output(result)
+        assert sanitized == "*Performance Impact Assessment*\nActual content"
+
+    def test_no_marker_returns_original(self):
+        result = "Some output without the marker"
+        assert _sanitize_gemini_output(result) == result
+
+    def test_marker_at_start_returns_unchanged(self):
+        result = "*Performance Impact Assessment*\nContent here"
+        assert _sanitize_gemini_output(result) == result