diff --git a/scripts/start-dev.sh b/scripts/start-dev.sh old mode 100644 new mode 100755 diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index f740c91..edf2cb8 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -99,7 +99,9 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non ) -def _get_language_specific_patterns(language: str | None) -> tuple[list[str], list[str]]: +def _get_language_specific_patterns( + language: str | None, +) -> tuple[list[str], list[str]]: """ Get source and test patterns based on repository language. @@ -140,13 +142,70 @@ def _get_language_specific_patterns(language: str | None) -> tuple[list[str], li # Default fallback patterns for unknown languages return ( ["**/*.py", "**/*.ts", "**/*.tsx", "**/*.js", "**/*.go"], - ["**/tests/**", "**/*_test.py", "**/*.spec.ts", "**/*.test.js", "**/*.test.ts", "**/*.test.jsx"], + [ + "**/tests/**", + "**/*_test.py", + "**/*.spec.ts", + "**/*.test.js", + "**/*.test.ts", + "**/*.test.jsx", + ], ) -def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]: +def _analyze_pr_bad_habits(state: RepositoryAnalysisState) -> dict[str, Any]: + """ + Analyze PR history to detect bad habits and patterns. + + Returns a dict with detected issues like: + - missing_tests: PRs without test files (estimated based on changed_files) + - short_titles: PRs with very short titles (< 10 characters) + - no_reviews: PRs merged without reviews (always 0, as we can't determine this from list API) + + Note: We can't analyze PR diffs/descriptions from the basic PR list API. + This would require fetching individual PR details which is expensive. + We analyze what we can from the PR list metadata. + """ + if not state.pr_samples: + return {} + + issues: dict[str, Any] = { + "missing_tests": 0, + "short_titles": 0, + "no_reviews": 0, + "total_analyzed": len(state.pr_samples), + } + + # Analyze PR titles for very short ones (likely missing context) + # A title < 10 characters is likely too short to be meaningful + short_title_threshold = 10 + for pr in state.pr_samples: + if pr.title and len(pr.title.strip()) < short_title_threshold: + issues["short_titles"] += 1 + + # Estimate missing tests: if PR has changed_files but no test-related patterns + # This is a heuristic - we can't know for sure without fetching diffs + # For now, we'll use a simple heuristic: if changed_files > 0 and title doesn't mention tests + if pr.changed_files and pr.changed_files > 0: + title_lower = (pr.title or "").lower() + # If PR has code changes but title doesn't mention tests/test/tested/testing + if not any(word in title_lower for word in ["test", "tests", "tested", "testing", "spec"]): + # This is a weak signal, but we'll count it + issues["missing_tests"] += 1 + + return issues + + +def _default_recommendations( + state: RepositoryAnalysisState, +) -> list[RuleRecommendation]: """ - Return a minimal, deterministic set of diff-aware rules. + Return a minimal, deterministic set of diff-aware rules based on repository analysis. + + Rules are generated based on: + 1. Repository language (for test patterns) + 2. PR history analysis (for bad habits) + 3. Contributing guidelines (if present) Note: These recommendations use repository-specific patterns when available. For more advanced use cases like restricting specific authors from specific paths @@ -161,30 +220,47 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme # Get language-specific patterns based on repository analysis source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language) + # Analyze PR history for bad habits + pr_issues = _analyze_pr_bad_habits(state) + # Require tests when source code changes. + # This is especially important if we detect missing tests in PR history + test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository." + if pr_issues.get("missing_tests", 0) > 0: + test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files." + + # Build YAML rule with proper indentation + # parameters: is at column 0, source_patterns: at column 2, list items at column 4 + source_patterns_yaml = "\n".join(f' - "{pattern}"' for pattern in source_patterns) + test_patterns_yaml = "\n".join(f' - "{pattern}"' for pattern in test_patterns) + + yaml_content = f"""description: "Require tests when code changes" +enabled: true +severity: medium +event_types: + - pull_request +parameters: + source_patterns: +{source_patterns_yaml} + test_patterns: +{test_patterns_yaml} +""" + recommendations.append( RuleRecommendation( - yaml_rule=textwrap.dedent( - f""" - description: "Require tests when code changes" - enabled: true - severity: medium - event_types: - - pull_request - parameters: - source_patterns: -{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)} - test_patterns: -{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)} - """ - ).strip(), - confidence=0.74, - reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.", + yaml_rule=yaml_content.strip(), + confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85, + reasoning=test_reasoning, strategy_used="hybrid", ) ) # Require description in PR body. + # Increase confidence if we detect short titles in PR history (indicator of missing context) + desc_reasoning = "Encourage context for reviewers; lightweight default." + if pr_issues.get("short_titles", 0) > 0: + desc_reasoning += f" Detected {pr_issues['short_titles']} PRs with very short titles (likely missing context)." + recommendations.append( RuleRecommendation( yaml_rule=textwrap.dedent( @@ -198,15 +274,19 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme min_description_length: 50 """ ).strip(), - confidence=0.68, - reasoning="Encourage context for reviewers; lightweight default.", + confidence=0.68 if pr_issues.get("short_titles", 0) == 0 else 0.80, + reasoning=desc_reasoning, strategy_used="static", ) ) - # If no CODEOWNERS, suggest one for shared ownership signals. - # Note: This is informational only - we can't enforce CODEOWNERS creation via validators - # but we can encourage it through the recommendation reasoning. + # If contributing guidelines require tests, increase confidence + if state.contributing_analysis.content is not None and state.contributing_analysis.requires_tests: + # Find the test rule and boost its confidence + for rec in recommendations: + if "tests" in rec.yaml_rule.lower(): + rec.confidence = min(0.95, rec.confidence + 0.1) + rec.reasoning += " Contributing guidelines explicitly require tests." return recommendations diff --git a/src/api/recommendations.py b/src/api/recommendations.py index de2525e..1a0ef22 100644 --- a/src/api/recommendations.py +++ b/src/api/recommendations.py @@ -171,11 +171,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith operation="proceed_with_pr", subject_ids=[repo], branch=request.branch_name, + base_branch=base_branch, base_sha=base_sha, - error="Failed to create branch", + error="Failed to create branch - check logs for GitHub API error details", ) raise HTTPException( - status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist." + status_code=400, + detail=( + f"Failed to create branch '{request.branch_name}' from '{base_branch}'. " + "The branch may already exist or you may not have permission to create branches." + ), ) file_result = await github_client.create_or_update_file( @@ -194,9 +199,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith subject_ids=[repo], branch=request.branch_name, file_path=request.file_path, - error="Failed to create or update file", + error="Failed to create or update file - check logs for GitHub API error details", + ) + raise HTTPException( + status_code=400, + detail=( + f"Failed to create or update file '{request.file_path}' on branch '{request.branch_name}'. " + "Check server logs for detailed error information." + ), ) - raise HTTPException(status_code=400, detail="Failed to create or update rules file") pr = await github_client.create_pull_request( repo_full_name=repo, @@ -214,9 +225,16 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith subject_ids=[repo], branch=request.branch_name, base_branch=base_branch, - error="Failed to create pull request", + pr_title=request.pr_title, + error="Failed to create pull request - check logs for GitHub API error details", + ) + raise HTTPException( + status_code=400, + detail=( + f"Failed to create pull request from '{request.branch_name}' to '{base_branch}'. " + "The PR may already exist, or you may not have permission to create PRs. Check server logs for details." + ), ) - raise HTTPException(status_code=400, detail="Failed to create pull request") pr_url = pr.get("html_url", "") if not pr_url: diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py index a633b3f..181597e 100644 --- a/src/integrations/github/api.py +++ b/src/integrations/github/api.py @@ -813,6 +813,7 @@ async def create_or_update_file( """Create or update a file via the Contents API.""" headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) if not headers: + logger.error(f"Failed to get auth headers for create_or_update_file in {repo_full_name}") return None url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{path.lstrip('/')}" payload: dict[str, Any] = { @@ -825,7 +826,14 @@ async def create_or_update_file( session = await self._get_session() async with session.put(url, headers=headers, json=payload) as response: if response.status in (200, 201): - return await response.json() + result = await response.json() + logger.info(f"Successfully created/updated file {path} in {repo_full_name} on branch {branch}") + return result + error_text = await response.text() + logger.error( + f"Failed to create/update file {path} in {repo_full_name} on branch {branch}. " + f"Status: {response.status}, Response: {error_text}" + ) return None async def create_pull_request( @@ -841,13 +849,25 @@ async def create_pull_request( """Open a pull request.""" headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) if not headers: + logger.error(f"Failed to get auth headers for create_pull_request in {repo_full_name}") return None url = f"{config.github.api_base_url}/repos/{repo_full_name}/pulls" payload = {"title": title, "head": head, "base": base, "body": body} session = await self._get_session() async with session.post(url, headers=headers, json=payload) as response: if response.status in (200, 201): - return await response.json() + result = await response.json() + pr_number = result.get("number") + pr_url = result.get("html_url", "") + logger.info( + f"Successfully created PR #{pr_number} in {repo_full_name}: {pr_url} (head: {head}, base: {base})" + ) + return result + error_text = await response.text() + logger.error( + f"Failed to create PR in {repo_full_name} (head: {head}, base: {base}). " + f"Status: {response.status}, Response: {error_text}" + ) return None async def _get_session(self) -> aiohttp.ClientSession: diff --git a/tests/unit/api/test_proceed_with_pr.py b/tests/unit/api/test_proceed_with_pr.py index 334b412..97959d6 100644 --- a/tests/unit/api/test_proceed_with_pr.py +++ b/tests/unit/api/test_proceed_with_pr.py @@ -13,7 +13,7 @@ async def _fake_get_sha(repo_full_name, ref, installation_id=None, user_token=No return "base-sha" async def _fake_create_ref(repo_full_name, ref, sha, installation_id=None, user_token=None): - return True + return {"ref": f"refs/heads/{ref}", "object": {"sha": sha}} async def _fake_create_or_update_file( repo_full_name, path, content, message, branch, installation_id=None, user_token=None, sha=None