Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 32 additions & 22 deletions bugzooka/analysis/pr_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,37 +49,41 @@ def _sanitize_gemini_output(result: str) -> str:
return sanitized


def _parse_pr_request(text: str) -> Optional[Tuple[str, str, str, str]]:
def _parse_pr_request(text: str) -> Optional[Tuple[str, str, list, str]]:
"""
Parse PR analysis request from text.

Expected format: "analyze pr: {github_url}, compare with {version}"
Expected formats:
Single PR: "analyze pr: {github_url}, compare with {version}"
Multi PR: "analyze pr: {url1} {url2}, compare with {version}"

Both PR URL and version are required.
All PRs must be from the same org/repo. Both PR URL(s) and version are required.

:param text: Message text to parse
:return: Tuple of (organization, repository, pr_number, version) or None if invalid
:return: Tuple of (organization, repository, pr_numbers, version) or None if invalid
"""
# Match GitHub PR URLs like https://github.com/org/repo/pull/123
pr_pattern = r"https?://github\.com/([^/]+)/([^/]+)/pull/(\d+)"
pr_match = re.search(pr_pattern, text)
pr_matches = re.findall(pr_pattern, text)

if not pr_match:
if not pr_matches:
return None

org, repo, pr_number = pr_match.groups()
org, repo, _ = pr_matches[0]
for match_org, match_repo, _ in pr_matches[1:]:
if match_org != org or match_repo != repo:
return None

pr_numbers = [m[2] for m in pr_matches]

# Extract version from "compare with X.XX" pattern - REQUIRED
version_pattern = r"compare\s+with\s+(\d+\.\d+)"
version_match = re.search(version_pattern, text, re.IGNORECASE)

if not version_match:
# Version is required - return None to trigger validation error
return None

version = version_match.group(1)

return org, repo, pr_number, version
return org, repo, pr_numbers, version


async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
Expand Down Expand Up @@ -117,9 +121,11 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
),
)

org, repo, pr_number, version = parsed
org, repo, pr_numbers, version = parsed
pr_display = ", ".join(f"#{pr}" for pr in pr_numbers)
logger.info(
f"🔍 PR analysis requested for {org}/{repo}/pull/{pr_number} (OpenShift {version})"
"PR analysis requested for %s/%s %s (OpenShift %s)",
org, repo, pr_display, version,
)

# Ensure MCP client is initialized
Expand All @@ -132,19 +138,23 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:

try:
logger.info(
"Starting PR performance analysis: %s/%s#%s (OpenShift %s)",
"Starting PR performance analysis: %s/%s %s (OpenShift %s)",
org,
repo,
pr_number,
pr_display,
version,
)

# Create prompt for PR analysis using centralized prompts
pr_url = f"https://github.com/{org}/{repo}/pull/{pr_number}"
pr_urls = ", ".join(
f"https://github.com/{org}/{repo}/pull/{pr}" for pr in pr_numbers
)
pr_numbers_str = ",".join(pr_numbers)

system_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["system"]
user_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["user"].format(
org=org, repo=repo, pr_number=pr_number, pr_url=pr_url, version=version
org=org, repo=repo, pr_numbers=pr_numbers_str,
pr_urls=pr_urls, version=version,
)
assistant_prompt = PR_PERFORMANCE_ANALYSIS_PROMPT["assistant"]

Expand Down Expand Up @@ -184,22 +194,22 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
# Also check if the result is very short (likely just a "no data" message)
if len(result.strip()) < 200:
logger.info(
"No performance test data found for PR %s/%s#%s",
"No performance test data found for PR %s/%s %s",
org,
repo,
pr_number,
pr_display,
)
return make_response(
success=True,
message=(
f"No performance test results found for PR #{pr_number}\n\n"
f"No performance test results found for PR {pr_display}\n\n"
f"This could mean:\n"
f"- Performance tests haven't run yet for this PR\n"
f"- The PR doesn't trigger performance test jobs\n"
f"- Test results are not yet available in the Orion database\n\n"
f"Check back later or verify that performance tests are configured for this repository."
),
pr_info=(org, repo, pr_number, version),
pr_info=(org, repo, pr_numbers, version),
)

logger.info("PR analysis completed successfully (%d chars)", len(result))
Expand All @@ -210,7 +220,7 @@ async def analyze_pr_with_gemini(text: str, channel_id: str = None) -> dict:
return make_response(
success=True,
message=sanitized_result,
pr_info=(org, repo, pr_number, version),
pr_info=(org, repo, pr_numbers, version),
)

except Exception as e:
Expand Down
37 changes: 25 additions & 12 deletions bugzooka/analysis/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,37 +75,50 @@
5. **Sort All Metrics**: ALWAYS sort metrics by absolute percentage change (highest to lowest) in all tables and lists.
6. **Format Output**: Use Slack-friendly formatting as specified in user instructions.
""",
"user": """Please analyze the performance of this pull request:
"user": """Please analyze the performance of the following pull request(s):
- Organization: {org}
- Repository: {repo}
- Pull Request Number: {pr_number}
- PR URL: {pr_url}
- Pull Request Number(s): {pr_numbers}
- PR URL(s): {pr_urls}
- OpenShift Version: {version}

**Tool Calling Instructions:**
- If analyzing a SINGLE PR, call `openshift_report_on_pr` with the `pull_request` parameter set to the PR number.
- If analyzing MULTIPLE PRs, call `openshift_report_on_pr` with the `pull_requests` parameter set to a comma-separated string of PR numbers (e.g. "3169,3170"). The tool returns results for each PR under the `pulls` key.

**Required Output Structure:**
Output ONLY the sections below with ABSOLUTELY NO additional commentary, thinking process, or meta-commentary.

*Performance Impact Assessment*
- Overall Impact: State EXACTLY one of: ":exclamation: *Regression* :exclamation:" (only if 1 or more significant regression found), ":rocket: *Improvement* :rocket:" (only if 1 or more significant improvement found), ":arrow_right: *Neutral* :arrow_right:" (no significant changes)
- Significant regressions (≥10%): List with 🛑 emoji, metric name and short config name, grouped by config. ONLY include if |change| >= 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
- Significant improvements (≥10%): List with 🚀 emoji, metric name and short config name, grouped by config. ONLY include if |change| >= 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
- Moderate regressions (5-10%): List with ⚠️ emoji, metric name and short config name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
- Moderate improvements (5-10%): List with ✅ emoji, metric name and short config name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
- For SINGLE PR: show one assessment section.
- For MULTIPLE PRs: show a SEPARATE assessment section per PR (e.g. "*PR #3169*" then "*PR #3170*"). Each PR is independently compared against the periodic baseline — do NOT compare PRs against each other.
- Per-PR assessment structure:
- Overall Impact: State EXACTLY one of: ":exclamation: *Regression* :exclamation:" (only if 1 or more significant regression found), ":rocket: *Improvement* :rocket:" (only if 1 or more significant improvement found), ":arrow_right: *Neutral* :arrow_right:" (no significant changes)
- Significant regressions (≥10%): List with 🛑 emoji, metric name, grouped by config. ONLY include if |change| >= 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
- Significant improvements (≥10%): List with 🚀 emoji, metric name, grouped by config. ONLY include if |change| >= 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
- Moderate regressions (5-10%): List with ⚠️ emoji, metric name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as regression. Do not use bold font, omit section entirely if none found.
- Moderate improvements (5-10%): List with ✅ emoji, metric name, grouped by config. ONLY include if 5% <= |change| < 10% AND classified as improvement. Do not use bold font, omit section entirely if none found.
- End this section with a line of 80 equals signs.

*ONLY IF SIGNIFICANT REGRESSION IS FOUND, INCLUDE THE FOLLOWING SECTION*
*Regression Analysis*:
1. Root Cause: Identify the most likely cause of the significant regression. Be as specific as possible.
2. Impact: Describe the impact of the significant regression on the system.
3. Recommendations: Suggest corrective actions to address the significant regression.
- For SINGLE PR: one regression analysis section.
- For MULTIPLE PRs: a SEPARATE regression analysis per PR that has significant regressions. Label each with the PR number (e.g. "*Regression Analysis (PR #3169)*").
- Per-PR regression analysis structure:
1. Root Cause: Identify the most likely cause of the significant regression. Be as specific as possible.
2. Impact: Describe the impact of the significant regression on the system.
3. Recommendations: Suggest corrective actions to address the significant regression.
End this section with a line of 80 equals signs.

*Most Impacted Metrics*
For each config:
- Transform config name to readable format: "/orion/examples/trt-external-payload-cluster-density.yaml" → "cluster-density"
- Table header: e.g. *Config: cluster-density*
- MANDATORY: Include ONLY top 10 metrics sorted by absolute percentage change (highest impact first)
- Columns: Metric | Baseline | PR Value | Change (%)
- For SINGLE PR — Columns: Metric | Baseline | PR Value | Change (%)
- For MULTIPLE PRs — use a single combined table with all PRs side by side:
Columns: Metric | Baseline | PR #X Value | PR #X Change(%) | PR #Y Value | PR #Y Change(%) | ...
This lets the user compare PRs at a glance without scrolling between separate tables.
- Format tables with `code` blocks, adjust column widths to fit data
- No emojis in tables
- Separate each config section with 80 equals signs.
Expand Down
6 changes: 4 additions & 2 deletions bugzooka/integrations/slack_socket_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,12 @@ def _process_mention(self, event: Dict[str, Any]) -> None:
)

if analysis_result["success"]:
org, repo, pr_number, version = analysis_result["pr_info"]
org, repo, pr_numbers, version = analysis_result["pr_info"]
_pr_repo = f"{org}/{repo}"
pr_display = ", ".join(f"#{pr}" for pr in pr_numbers)
self.logger.info(
f"✅ Sent PR analysis for {org}/{repo}#{pr_number} (OpenShift {version}) to {user}"
"Sent PR analysis for %s/%s %s (OpenShift %s) to %s",
org, repo, pr_display, version, user,
)
else:
self.logger.warning(
Expand Down
79 changes: 79 additions & 0 deletions tests/test_pr_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
Tests for PR analysis request parsing and output sanitization.
"""
from bugzooka.analysis.pr_analyzer import _parse_pr_request, _sanitize_gemini_output


class TestParsePrRequest:
"""Tests for _parse_pr_request() — single and multi-PR parsing."""

def test_single_pr(self):
text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169, compare with 5.0"
result = _parse_pr_request(text)
assert result == ("openshift", "ovn-kubernetes", ["3169"], "5.0")

def test_multi_pr_space_separated(self):
text = (
"analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
"https://github.com/openshift/ovn-kubernetes/pull/3170, compare with 5.0"
)
result = _parse_pr_request(text)
assert result == ("openshift", "ovn-kubernetes", ["3169", "3170"], "5.0")

def test_multi_pr_three_prs(self):
text = (
"analyze pr: https://github.com/openshift/ovn-kubernetes/pull/100 "
"https://github.com/openshift/ovn-kubernetes/pull/200 "
"https://github.com/openshift/ovn-kubernetes/pull/300, compare with 4.19"
)
result = _parse_pr_request(text)
assert result == ("openshift", "ovn-kubernetes", ["100", "200", "300"], "4.19")

def test_mixed_repos_returns_none(self):
text = (
"analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
"https://github.com/openshift/installer/pull/100, compare with 5.0"
)
assert _parse_pr_request(text) is None

def test_mixed_orgs_returns_none(self):
text = (
"analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169 "
"https://github.com/other-org/ovn-kubernetes/pull/100, compare with 5.0"
)
assert _parse_pr_request(text) is None

def test_no_version_returns_none(self):
text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169"
assert _parse_pr_request(text) is None

def test_no_url_returns_none(self):
text = "analyze pr: some random text, compare with 5.0"
assert _parse_pr_request(text) is None

def test_version_case_insensitive(self):
text = "analyze pr: https://github.com/openshift/ovn-kubernetes/pull/3169, Compare With 4.19"
result = _parse_pr_request(text)
assert result == ("openshift", "ovn-kubernetes", ["3169"], "4.19")

def test_http_url(self):
text = "analyze pr: http://github.com/openshift/ovn-kubernetes/pull/3169, compare with 5.0"
result = _parse_pr_request(text)
assert result == ("openshift", "ovn-kubernetes", ["3169"], "5.0")


class TestSanitizeGeminiOutput:
"""Tests for _sanitize_gemini_output()."""

def test_removes_thinking_before_marker(self):
result = "Let me think about this...\n*Performance Impact Assessment*\nActual content"
sanitized = _sanitize_gemini_output(result)
assert sanitized == "*Performance Impact Assessment*\nActual content"

def test_no_marker_returns_original(self):
result = "Some output without the marker"
assert _sanitize_gemini_output(result) == result

def test_marker_at_start_returns_unchanged(self):
result = "*Performance Impact Assessment*\nContent here"
assert _sanitize_gemini_output(result) == result
Loading