Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified scripts/start-dev.sh
100644 → 100755
Empty file.
130 changes: 105 additions & 25 deletions src/agents/repository_analysis_agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non
)


def _get_language_specific_patterns(language: str | None) -> tuple[list[str], list[str]]:
def _get_language_specific_patterns(
language: str | None,
) -> tuple[list[str], list[str]]:
"""
Get source and test patterns based on repository language.

Expand Down Expand Up @@ -140,13 +142,70 @@ def _get_language_specific_patterns(language: str | None) -> tuple[list[str], li
# Default fallback patterns for unknown languages
return (
["**/*.py", "**/*.ts", "**/*.tsx", "**/*.js", "**/*.go"],
["**/tests/**", "**/*_test.py", "**/*.spec.ts", "**/*.test.js", "**/*.test.ts", "**/*.test.jsx"],
[
"**/tests/**",
"**/*_test.py",
"**/*.spec.ts",
"**/*.test.js",
"**/*.test.ts",
"**/*.test.jsx",
],
)


def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]:
def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, Any]:
"""
Analyze PR history to detect bad habits and patterns.

Returns a dict with detected issues like:
- missing_tests: PRs without test files (estimated based on changed_files)
- short_titles: PRs with very short titles (< 10 characters)
- no_reviews: PRs merged without reviews (always 0, as we can't determine this from list API)

Note: We can't analyze PR diffs/descriptions from the basic PR list API.
This would require fetching individual PR details which is expensive.
We analyze what we can from the PR list metadata.
"""
if not state.pr_samples:
return {}

issues: dict[str, Any] = {
"missing_tests": 0,
"short_titles": 0,
"no_reviews": 0,
"total_analyzed": len(state.pr_samples),
}
Comment on lines +156 to +177

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The type hint dict[str, Any] is used for both the function's return value and the issues variable. Since all values in the dictionary are integers, it would be more precise to use dict[str, int]. This improves type safety and makes the code easier to understand for future maintainers.

def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, int]:
    """
    Analyze PR history to detect bad habits and patterns.

    Returns a dict with detected issues like:
    - missing_tests: PRs without test files (estimated based on changed_files)
    - short_titles: PRs with very short titles (< 10 characters)
    - no_reviews: PRs merged without reviews (always 0, as we can't determine this from list API)

    Note: We can't analyze PR diffs/descriptions from the basic PR list API.
    This would require fetching individual PR details which is expensive.
    We analyze what we can from the PR list metadata.
    """
    if not state.pr_samples:
        return {}

    issues: dict[str, int] = {
        "missing_tests": 0,
        "short_titles": 0,
        "no_reviews": 0,
        "total_analyzed": len(state.pr_samples),
    }


# Analyze PR titles for very short ones (likely missing context)
# A title < 10 characters is likely too short to be meaningful
short_title_threshold = 10
for pr in state.pr_samples:
if pr.title and len(pr.title.strip()) < short_title_threshold:
issues["short_titles"] += 1

# Estimate missing tests: if PR has changed_files but no test-related patterns
# This is a heuristic - we can't know for sure without fetching diffs
# For now, we'll use a simple heuristic: if changed_files > 0 and title doesn't mention tests
if pr.changed_files and pr.changed_files > 0:
title_lower = (pr.title or "").lower()
# If PR has code changes but title doesn't mention tests/test/tested/testing
if not any(word in title_lower for word in ["test", "tests", "tested", "testing", "spec"]):
# This is a weak signal, but we'll count it
issues["missing_tests"] += 1

return issues


def _default_recommendations(
state: RepositoryAnalysisState,
) -> list[RuleRecommendation]:
"""
Return a minimal, deterministic set of diff-aware rules.
Return a minimal, deterministic set of diff-aware rules based on repository analysis.

Rules are generated based on:
1. Repository language (for test patterns)
2. PR history analysis (for bad habits)
3. Contributing guidelines (if present)

Note: These recommendations use repository-specific patterns when available.
For more advanced use cases like restricting specific authors from specific paths
Expand All @@ -161,30 +220,47 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
# Get language-specific patterns based on repository analysis
source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language)

# Analyze PR history for bad habits
pr_issues = _analyze_pr_bad_habits(state)

# Require tests when source code changes.
# This is especially important if we detect missing tests in PR history
test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository."
if pr_issues.get("missing_tests", 0) > 0:
test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files."

# Build YAML rule with proper indentation
# parameters: is at column 0, source_patterns: at column 2, list items at column 4
source_patterns_yaml = "\n".join(f' - "{pattern}"' for pattern in source_patterns)
test_patterns_yaml = "\n".join(f' - "{pattern}"' for pattern in test_patterns)

yaml_content = f"""description: "Require tests when code changes"
enabled: true
severity: medium
event_types:
- pull_request
parameters:
source_patterns:
{source_patterns_yaml}
test_patterns:
{test_patterns_yaml}
"""

recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
f"""
description: "Require tests when code changes"
enabled: true
severity: medium
event_types:
- pull_request
parameters:
source_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)}
test_patterns:
{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)}
"""
).strip(),
confidence=0.74,
reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.",
yaml_rule=yaml_content.strip(),
confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85,
reasoning=test_reasoning,
strategy_used="hybrid",
)
)

# Require description in PR body.
# Increase confidence if we detect short titles in PR history (indicator of missing context)
desc_reasoning = "Encourage context for reviewers; lightweight default."
if pr_issues.get("short_titles", 0) > 0:
desc_reasoning += f" Detected {pr_issues['short_titles']} PRs with very short titles (likely missing context)."

recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
Expand All @@ -198,15 +274,19 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme
min_description_length: 50
"""
).strip(),
confidence=0.68,
reasoning="Encourage context for reviewers; lightweight default.",
confidence=0.68 if pr_issues.get("short_titles", 0) == 0 else 0.80,
reasoning=desc_reasoning,
strategy_used="static",
)
)

# If no CODEOWNERS, suggest one for shared ownership signals.
# Note: This is informational only - we can't enforce CODEOWNERS creation via validators
# but we can encourage it through the recommendation reasoning.
# If contributing guidelines require tests, increase confidence
if state.contributing_analysis.content is not None and state.contributing_analysis.requires_tests:
# Find the test rule and boost its confidence
for rec in recommendations:
if "tests" in rec.yaml_rule.lower():
rec.confidence = min(0.95, rec.confidence + 0.1)
rec.reasoning += " Contributing guidelines explicitly require tests."
Comment on lines +286 to +289

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current logic for finding the test-related rule recommendation relies on a simple substring search ("tests" in rec.yaml_rule.lower()). This is brittle and could lead to incorrectly modifying another rule if its content happens to include the word "tests". A more robust approach would be to identify the rule by a more specific characteristic, such as its description, and to stop searching once the rule is found.

        for rec in recommendations:
            if 'description: "Require tests when code changes"' in rec.yaml_rule:
                rec.confidence = min(0.95, rec.confidence + 0.1)
                rec.reasoning += " Contributing guidelines explicitly require tests."
                break


return recommendations

Expand Down
30 changes: 24 additions & 6 deletions src/api/recommendations.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
operation="proceed_with_pr",
subject_ids=[repo],
branch=request.branch_name,
base_branch=base_branch,
base_sha=base_sha,
error="Failed to create branch",
error="Failed to create branch - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist."
status_code=400,
detail=(
f"Failed to create branch '{request.branch_name}' from '{base_branch}'. "
"The branch may already exist or you may not have permission to create branches."
),
)

file_result = await github_client.create_or_update_file(
Expand All @@ -194,9 +199,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
subject_ids=[repo],
branch=request.branch_name,
file_path=request.file_path,
error="Failed to create or update file",
error="Failed to create or update file - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400,
detail=(
f"Failed to create or update file '{request.file_path}' on branch '{request.branch_name}'. "
"Check server logs for detailed error information."
),
)
raise HTTPException(status_code=400, detail="Failed to create or update rules file")

pr = await github_client.create_pull_request(
repo_full_name=repo,
Expand All @@ -214,9 +225,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
subject_ids=[repo],
branch=request.branch_name,
base_branch=base_branch,
error="Failed to create pull request",
pr_title=request.pr_title,
error="Failed to create pull request - check logs for GitHub API error details",
)
raise HTTPException(
status_code=400,
detail=(
f"Failed to create pull request from '{request.branch_name}' to '{base_branch}'. "
"The PR may already exist, or you may not have permission to create PRs. Check server logs for details."
),
)
raise HTTPException(status_code=400, detail="Failed to create pull request")

pr_url = pr.get("html_url", "")
if not pr_url:
Expand Down
24 changes: 22 additions & 2 deletions src/integrations/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ async def create_or_update_file(
"""Create or update a file via the Contents API."""
headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
if not headers:
logger.error(f"Failed to get auth headers for create_or_update_file in {repo_full_name}")
return None
url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{path.lstrip('/')}"
payload: dict[str, Any] = {
Expand All @@ -825,7 +826,14 @@ async def create_or_update_file(
session = await self._get_session()
async with session.put(url, headers=headers, json=payload) as response:
if response.status in (200, 201):
return await response.json()
result = await response.json()
logger.info(f"Successfully created/updated file {path} in {repo_full_name} on branch {branch}")
return result
error_text = await response.text()
logger.error(
f"Failed to create/update file {path} in {repo_full_name} on branch {branch}. "
f"Status: {response.status}, Response: {error_text}"
)
return None

async def create_pull_request(
Expand All @@ -841,13 +849,25 @@ async def create_pull_request(
"""Open a pull request."""
headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token)
if not headers:
logger.error(f"Failed to get auth headers for create_pull_request in {repo_full_name}")
return None
url = f"{config.github.api_base_url}/repos/{repo_full_name}/pulls"
payload = {"title": title, "head": head, "base": base, "body": body}
session = await self._get_session()
async with session.post(url, headers=headers, json=payload) as response:
if response.status in (200, 201):
return await response.json()
result = await response.json()
pr_number = result.get("number")
pr_url = result.get("html_url", "")
logger.info(
f"Successfully created PR #{pr_number} in {repo_full_name}: {pr_url} (head: {head}, base: {base})"
)
return result
error_text = await response.text()
logger.error(
f"Failed to create PR in {repo_full_name} (head: {head}, base: {base}). "
f"Status: {response.status}, Response: {error_text}"
)
return None

async def _get_session(self) -> aiohttp.ClientSession:
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/api/test_proceed_with_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async def _fake_get_sha(repo_full_name, ref, installation_id=None, user_token=No
return "base-sha"

async def _fake_create_ref(repo_full_name, ref, sha, installation_id=None, user_token=None):
return True
return {"ref": f"refs/heads/{ref}", "object": {"sha": sha}}

async def _fake_create_or_update_file(
repo_full_name, path, content, message, branch, installation_id=None, user_token=None, sha=None
Expand Down