From 6b0307faeca9f8b2f344c7708ead196f930392ef Mon Sep 17 00:00:00 2001 From: naaa760 Date: Tue, 9 Dec 2025 12:27:22 +0530 Subject: [PATCH 1/3] feat: add PR history sampling and diff-aware rule recommendations --- src/agents/repository_analysis_agent/agent.py | 195 ++++++++ .../repository_analysis_agent/models.py | 113 +++++ src/agents/repository_analysis_agent/nodes.py | 443 ++++++++++++++++++ src/integrations/github/api.py | 37 ++ 4 files changed, 788 insertions(+) create mode 100644 src/agents/repository_analysis_agent/agent.py create mode 100644 src/agents/repository_analysis_agent/models.py create mode 100644 src/agents/repository_analysis_agent/nodes.py diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py new file mode 100644 index 0000000..f33f4ca --- /dev/null +++ b/src/agents/repository_analysis_agent/agent.py @@ -0,0 +1,195 @@ +import logging +import time +from datetime import datetime +from typing import Any, Dict + +from langgraph.graph import END, START, StateGraph + +from src.agents.base import AgentResult, BaseAgent +from src.agents.repository_analysis_agent.models import ( + RepositoryAnalysisRequest, + RepositoryAnalysisResponse, + RepositoryAnalysisState, +) +from src.agents.repository_analysis_agent.nodes import ( + analyze_contributing_guidelines, + analyze_repository_structure, + generate_rule_recommendations, + summarize_analysis, + validate_recommendations, +) + +logger = logging.getLogger(__name__) + + +class RepositoryAnalysisAgent(BaseAgent): + """ + Agent that analyzes GitHub repositories to generate Watchflow rule recommendations. + + This agent performs multi-step analysis: + 1. Analyzes repository structure and features + 2. Parses contributing guidelines for patterns + 3. Reviews commit/PR patterns + 4. Generates rule recommendations with confidence scores + 5. Validates recommendations are valid YAML + + Returns structured recommendations that can be directly used as Watchflow rules. + """ + + def __init__(self, max_retries: int = 3, timeout: float = 120.0): + super().__init__(max_retries=max_retries, agent_name="repository_analysis_agent") + self.timeout = timeout + + logger.info("Repository Analysis Agent initialized") + logger.info(f"Max retries: {max_retries}, Timeout: {timeout}s") + + def _build_graph(self) -> StateGraph: + """Build the LangGraph workflow for repository analysis.""" + workflow = StateGraph(RepositoryAnalysisState) + + # Add nodes + workflow.add_node("analyze_repository_structure", analyze_repository_structure) + workflow.add_node("analyze_pr_history", analyze_pr_history) + workflow.add_node("analyze_contributing_guidelines", analyze_contributing_guidelines) + workflow.add_node("generate_rule_recommendations", generate_rule_recommendations) + workflow.add_node("validate_recommendations", validate_recommendations) + workflow.add_node("summarize_analysis", summarize_analysis) + + # Define workflow edges + workflow.add_edge(START, "analyze_repository_structure") + workflow.add_edge("analyze_repository_structure", "analyze_pr_history") + workflow.add_edge("analyze_pr_history", "analyze_contributing_guidelines") + workflow.add_edge("analyze_contributing_guidelines", "generate_rule_recommendations") + workflow.add_edge("generate_rule_recommendations", "validate_recommendations") + workflow.add_edge("validate_recommendations", "summarize_analysis") + workflow.add_edge("summarize_analysis", END) + + return workflow.compile() + + async def execute( + self, + repository_full_name: str, + installation_id: int | None = None, + **kwargs + ) -> AgentResult: + """ + Analyze a repository and generate rule recommendations. + + Args: + repository_full_name: Full repository name (owner/repo) + installation_id: Optional GitHub App installation ID for private repos + **kwargs: Additional parameters + + Returns: + AgentResult containing analysis results and recommendations + """ + start_time = time.time() + + try: + logger.info(f"Starting repository analysis for {repository_full_name}") + + # Validate input + if not repository_full_name or "/" not in repository_full_name: + return AgentResult( + success=False, + message="Invalid repository name format. Expected 'owner/repo'", + data={}, + metadata={"execution_time_ms": 0} + ) + + + initial_state = RepositoryAnalysisState( + repository_full_name=repository_full_name, + installation_id=installation_id, + analysis_steps=[], + errors=[], + ) + + logger.info("Initial state prepared, starting analysis workflow") + + + result = await self._execute_with_timeout( + self.graph.ainvoke(initial_state), + timeout=self.timeout + ) + + execution_time = time.time() - start_time + logger.info(f"Analysis completed in {execution_time:.2f}s") + + + if isinstance(result, dict): + state = RepositoryAnalysisState(**result) + else: + state = result + + + response = RepositoryAnalysisResponse( + repository_full_name=repository_full_name, + recommendations=state.recommendations, + analysis_summary=state.analysis_summary, + analyzed_at=datetime.now().isoformat(), + total_recommendations=len(state.recommendations), + ) + + # Check for errors + has_errors = len(state.errors) > 0 + success_message = ( + f"Analysis completed successfully with {len(state.recommendations)} recommendations" + ) + if has_errors: + success_message += f" ({len(state.errors)} errors encountered)" + + logger.info(f"Analysis result: {len(state.recommendations)} recommendations, {len(state.errors)} errors") + + return AgentResult( + success=not has_errors, + message=success_message, + data={"analysis_response": response}, + metadata={ + "execution_time_ms": execution_time * 1000, + "recommendations_count": len(state.recommendations), + "errors_count": len(state.errors), + "analysis_steps": state.analysis_steps, + } + ) + + except Exception as e: + execution_time = time.time() - start_time + logger.error(f"Error in repository analysis: {e}") + + return AgentResult( + success=False, + message=f"Repository analysis failed: {str(e)}", + data={}, + metadata={ + "execution_time_ms": execution_time * 1000, + "error_type": type(e).__name__, + } + ) + + async def analyze_repository(self, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse: + """ + Convenience method for analyzing a repository using the request model. + + Args: + request: Repository analysis request + + Returns: + Repository analysis response + """ + result = await self.execute( + repository_full_name=request.repository_full_name, + installation_id=request.installation_id, + ) + + if result.success and "analysis_response" in result.data: + return result.data["analysis_response"] + else: + + return RepositoryAnalysisResponse( + repository_full_name=request.repository_full_name, + recommendations=[], + analysis_summary={"error": result.message}, + analyzed_at=datetime.now().isoformat(), + total_recommendations=0, + ) diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py new file mode 100644 index 0000000..8280531 --- /dev/null +++ b/src/agents/repository_analysis_agent/models.py @@ -0,0 +1,113 @@ +from enum import Enum +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + + +class AnalysisSource(str, Enum): + """Sources of analysis data for rule recommendations.""" + + CONTRIBUTING_GUIDELINES = "contributing_guidelines" + REPOSITORY_STRUCTURE = "repository_structure" + WORKFLOWS = "workflows" + BRANCH_PROTECTION = "branch_protection" + COMMIT_PATTERNS = "commit_patterns" + PR_PATTERNS = "pr_patterns" + + +class RuleRecommendation(BaseModel): + """A recommended Watchflow rule with confidence and reasoning.""" + + yaml_content: str = Field(description="Valid Watchflow rule YAML content") + confidence: float = Field( + description="Confidence score (0.0-1.0) in the recommendation", + ge=0.0, + le=1.0 + ) + reasoning: str = Field(description="Explanation of why this rule is recommended") + source_patterns: List[str] = Field( + description="Repository patterns that led to this recommendation", + default_factory=list + ) + category: str = Field(description="Category of the rule (e.g., 'quality', 'security', 'process')") + estimated_impact: str = Field(description="Expected impact (e.g., 'high', 'medium', 'low')") + + +class RepositoryAnalysisRequest(BaseModel): + """Request model for repository analysis.""" + + repository_full_name: str = Field(description="Full repository name (owner/repo)") + installation_id: Optional[int] = Field( + description="GitHub App installation ID for accessing private repos", + default=None + ) + + +class RepositoryFeatures(BaseModel): + """Features and characteristics discovered in the repository.""" + + has_contributing: bool = Field(description="Has CONTRIBUTING.md file", default=False) + has_codeowners: bool = Field(description="Has CODEOWNERS file", default=False) + has_workflows: bool = Field(description="Has GitHub Actions workflows", default=False) + has_branch_protection: bool = Field(description="Has branch protection rules", default=False) + workflow_count: int = Field(description="Number of workflow files", default=0) + language: Optional[str] = Field(description="Primary programming language", default=None) + contributor_count: int = Field(description="Number of contributors", default=0) + pr_count: int = Field(description="Number of pull requests", default=0) + issue_count: int = Field(description="Number of issues", default=0) + + +class ContributingGuidelinesAnalysis(BaseModel): + """Analysis of contributing guidelines content.""" + + content: Optional[str] = Field(description="Full CONTRIBUTING.md content", default=None) + has_pr_template: bool = Field(description="Requires PR templates", default=False) + has_issue_template: bool = Field(description="Requires issue templates", default=False) + requires_tests: bool = Field(description="Requires tests for contributions", default=False) + requires_docs: bool = Field(description="Requires documentation updates", default=False) + code_style_requirements: List[str] = Field( + description="Code style requirements mentioned", + default_factory=list + ) + review_requirements: List[str] = Field( + description="Code review requirements mentioned", + default_factory=list + ) + + +class RepositoryAnalysisState(BaseModel): + """State for the repository analysis workflow.""" + + repository_full_name: str + installation_id: Optional[int] + pr_samples: List[dict[str, Any]] = Field(default_factory=list) + + # Analysis data + repository_features: RepositoryFeatures = Field(default_factory=RepositoryFeatures) + contributing_analysis: ContributingGuidelinesAnalysis = Field( + default_factory=ContributingGuidelinesAnalysis + ) + + # Processing state + analysis_steps: List[str] = Field(default_factory=list) + errors: List[str] = Field(default_factory=list) + + # Results + recommendations: List[RuleRecommendation] = Field(default_factory=list) + analysis_summary: Dict[str, Any] = Field(default_factory=dict) + + +class RepositoryAnalysisResponse(BaseModel): + """Response model containing rule recommendations.""" + + repository_full_name: str = Field(description="Repository that was analyzed") + recommendations: List[RuleRecommendation] = Field( + description="List of recommended Watchflow rules", + default_factory=list + ) + analysis_summary: Dict[str, Any] = Field( + description="Summary of analysis findings", + default_factory=dict + ) + analyzed_at: str = Field(description="Timestamp of analysis") + total_recommendations: int = Field(description="Total number of recommendations made") diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py new file mode 100644 index 0000000..02262a0 --- /dev/null +++ b/src/agents/repository_analysis_agent/nodes.py @@ -0,0 +1,443 @@ +import logging +from typing import Any, Dict + +from src.agents.repository_analysis_agent.models import ( + AnalysisSource, + ContributingGuidelinesAnalysis, + RepositoryAnalysisState, + RepositoryFeatures, + RuleRecommendation, +) +from src.agents.repository_analysis_agent.prompts import ( + CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT, + REPOSITORY_ANALYSIS_PROMPT, + RULE_GENERATION_PROMPT, +) +from src.integrations.github.api import github_client + +logger = logging.getLogger(__name__) + + +async def analyze_repository_structure(state: RepositoryAnalysisState) -> Dict[str, Any]: + """ + Analyze basic repository structure and features. + + Gathers information about workflows, branch protection, contributors, etc. + """ + try: + logger.info(f"Analyzing repository structure for {state.repository_full_name}") + + features = RepositoryFeatures() + contributing_content = await github_client.get_file_content( + state.repository_full_name, "CONTRIBUTING.md", state.installation_id + ) + features.has_contributing = contributing_content is not None + + codeowners_content = await github_client.get_file_content( + state.repository_full_name, ".github/CODEOWNERS", state.installation_id + ) + features.has_codeowners = codeowners_content is not None + + + workflow_content = await github_client.get_file_content( + state.repository_full_name, ".github/workflows/main.yml", state.installation_id + ) + if workflow_content: + features.has_workflows = True + features.workflow_count = 1 + + + contributors = await github_client.get_repository_contributors( + state.repository_full_name, state.installation_id + ) + features.contributor_count = len(contributors) if contributors else 0 + + # TODO: Add more repository analysis (PR count, issues, language detection, etc.) + + logger.info(f"Repository analysis complete: {features.model_dump()}") + + state.repository_features = features + state.analysis_steps.append("repository_structure_analyzed") + + return {"repository_features": features, "analysis_steps": state.analysis_steps} + + except Exception as e: + logger.error(f"Error analyzing repository structure: {e}") + state.errors.append(f"Repository structure analysis failed: {str(e)}") + return {"errors": state.errors} + + +async def analyze_pr_history(state: RepositoryAnalysisState) -> Dict[str, Any]: + """Pull a small PR sample to inform rule recommendations.""" + try: + logger.info(f"Fetching recent PRs for {state.repository_full_name}") + prs = await github_client.list_pull_requests( + state.repository_full_name, state.installation_id or 0, state="closed", per_page=20 + ) + + pr_samples: list[dict[str, Any]] = [] + for pr in prs: + pr_samples.append( + { + "number": pr.get("number"), + "title": pr.get("title"), + "merged": pr.get("merged_at") is not None, + "changed_files": pr.get("changed_files"), + "additions": pr.get("additions"), + "deletions": pr.get("deletions"), + "user": pr.get("user", {}).get("login"), + } + ) + + state.pr_samples = pr_samples + state.analysis_steps.append("pr_history_sampled") + logger.info(f"Collected {len(pr_samples)} PR samples") + return {"pr_samples": pr_samples, "analysis_steps": state.analysis_steps} + except Exception as e: + logger.error(f"Error analyzing PR history: {e}") + state.errors.append(f"PR history analysis failed: {str(e)}") + return {"errors": state.errors} + + +async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Dict[str, Any]: + """ + Analyze CONTRIBUTING.md file for patterns and requirements. + """ + try: + logger.info(f" Analyzing contributing guidelines for {state.repository_full_name}") + + # Get contributing guidelines content + content = await github_client.get_file_content( + state.repository_full_name, "CONTRIBUTING.md", state.installation_id + ) + + if not content: + logger.info("No CONTRIBUTING.md file found") + analysis = ContributingGuidelinesAnalysis() + else: + + llm = github_client.llm if hasattr(github_client, 'llm') else None + if llm: + try: + prompt = CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT.format(content=content) + response = await llm.ainvoke(prompt) + + + # TODO: Parse JSON response and create ContributingGuidelinesAnalysis + + analysis = ContributingGuidelinesAnalysis(content=content) + except Exception as e: + logger.error(f"LLM analysis failed: {e}") + analysis = ContributingGuidelinesAnalysis(content=content) + else: + analysis = ContributingGuidelinesAnalysis(content=content) + + state.contributing_analysis = analysis + state.analysis_steps.append("contributing_guidelines_analyzed") + + logger.info(" Contributing guidelines analysis complete") + + return {"contributing_analysis": analysis, "analysis_steps": state.analysis_steps} + + except Exception as e: + logger.error(f"Error analyzing contributing guidelines: {e}") + state.errors.append(f"Contributing guidelines analysis failed: {str(e)}") + return {"errors": state.errors} + + +async def generate_rule_recommendations(state: RepositoryAnalysisState) -> Dict[str, Any]: + """ + Generate Watchflow rule recommendations based on repository analysis. + """ + try: + logger.info(f" Generating rule recommendations for {state.repository_full_name}") + + recommendations = [] + + features = state.repository_features + contributing = state.contributing_analysis + + + # Diff-aware: enforce filter handling in core RAG/query code + recommendations.append( + RuleRecommendation( + yaml_content="""description: "Block merges when PRs change filter validation logic without failing on invalid inputs" +enabled: true +severity: "high" +event_types: ["pull_request"] +parameters: + file_patterns: + - "packages/core/src/**/vector-query.ts" + - "packages/core/src/**/graph-rag.ts" + - "packages/core/src/**/filters/*.ts" + require_patterns: + - "throw\\\\s+new\\\\s+Error" + - "raise\\\\s+ValueError" + forbidden_patterns: + - "return\\\\s+.*filter\\\\s*$" + how_to_fix: "Ensure invalid filters raise descriptive errors instead of silently returning unfiltered results." +""", + confidence=0.85, + reasoning="Filter handling regressions were flagged in historical fixes; enforce throws on invalid input.", + source_patterns=["pr_history"], + category="quality", + estimated_impact="high", + ) + ) + + # Diff-aware: enforce test updates when core code changes + recommendations.append( + RuleRecommendation( + yaml_content="""description: "Require regression tests when modifying tool schema validation or client tool execution" +enabled: true +severity: "medium" +event_types: ["pull_request"] +parameters: + source_patterns: + - "packages/core/src/**/tool*.ts" + - "packages/core/src/agent/**" + - "packages/client/**" + test_patterns: + - "packages/core/tests/**" + - "tests/**" + min_test_files: 1 + rationale: "Tool invocation changes have previously caused regressions in clientTools streaming." +""", + confidence=0.8, + reasoning="Core tool changes often broke client tools; require at least one related test update.", + source_patterns=["pr_history"], + category="quality", + estimated_impact="medium", + ) + ) + + # Diff-aware: ensure agent descriptions exist + recommendations.append( + RuleRecommendation( + yaml_content="""description: "Ensure every agent exposes a user-facing description for UI profiles" +enabled: true +severity: "low" +event_types: ["pull_request"] +parameters: + file_patterns: + - "packages/core/src/agent/**" + required_text: + - "description" + message: "Add or update the agent description so downstream UIs can render capabilities." +""", + confidence=0.75, + reasoning="Agent profile UIs require descriptions; ensure new/updated agents include them.", + source_patterns=["pr_history"], + category="process", + estimated_impact="low", + ) + ) + + # Diff-aware: preserve URL handling for supported providers + recommendations.append( + RuleRecommendation( + yaml_content="""description: "Block merges when URL or asset handling changes bypass provider capability checks" +enabled: true +severity: "high" +event_types: ["pull_request"] +parameters: + file_patterns: + - "packages/core/src/agent/message-list/**" + - "packages/core/src/llm/**" + require_patterns: + - "isUrlSupportedByModel" + forbidden_patterns: + - "downloadAssetsFromMessages\\(messages\\)" + how_to_fix: "Preserve remote URLs for providers that support them natively; only download assets for unsupported providers." +""", + confidence=0.8, + reasoning="Past URL handling bugs; ensure capability checks remain intact.", + source_patterns=["pr_history"], + category="quality", + estimated_impact="high", + ) + ) + + # Legacy structural signals retained for completeness + if features.has_workflows: + recommendations.append(RuleRecommendation( + yaml_content="""description: "Require CI checks to pass" +enabled: true +severity: "high" +event_types: + - pull_request +conditions: + - type: "ci_checks_passed" + parameters: + required_checks: [] +actions: + - type: "block_merge" + parameters: + message: "All CI checks must pass before merging" +""", + confidence=0.9, + reasoning="Repository has CI workflows configured, so requiring checks to pass is a standard practice", + source_patterns=["has_workflows"], + category="quality", + estimated_impact="high" + )) + + if features.has_codeowners: + recommendations.append(RuleRecommendation( + yaml_content="""description: "Require CODEOWNERS approval for changes" +enabled: true +severity: "medium" +event_types: + - pull_request +conditions: + - type: "codeowners_approved" + parameters: {} +actions: + - type: "require_approval" + parameters: + message: "CODEOWNERS must approve changes to owned files" +""", + confidence=0.8, + reasoning="CODEOWNERS file exists, indicating ownership requirements for code changes", + source_patterns=["has_codeowners"], + category="process", + estimated_impact="medium" + )) + + if contributing.requires_tests: + recommendations.append(RuleRecommendation( + yaml_content="""description: "Require test coverage for code changes" +enabled: true +severity: "medium" +event_types: + - pull_request +conditions: + - type: "test_coverage_threshold" + parameters: + minimum_coverage: 80 +actions: + - type: "block_merge" + parameters: + message: "Test coverage must be at least 80%" +""", + confidence=0.7, + reasoning="Contributing guidelines mention testing requirements", + source_patterns=["requires_tests"], + category="quality", + estimated_impact="medium" + )) + + if features.contributor_count > 10: + recommendations.append(RuleRecommendation( + yaml_content="""description: "Require at least one approval for pull requests" +enabled: true +severity: "medium" +event_types: + - pull_request +conditions: + - type: "minimum_approvals" + parameters: + count: 1 +actions: + - type: "block_merge" + parameters: + message: "Pull requests require at least one approval" +""", + confidence=0.6, + reasoning="Repository has multiple contributors, indicating collaborative development", + source_patterns=["contributor_count"], + category="process", + estimated_impact="medium" + )) + + + state.recommendations = recommendations + state.analysis_steps.append("recommendations_generated") + + logger.info(f"Generated {len(recommendations)} rule recommendations") + + return {"recommendations": recommendations, "analysis_steps": state.analysis_steps} + + except Exception as e: + logger.error(f"Error generating recommendations: {e}") + state.errors.append(f"Recommendation generation failed: {str(e)}") + return {"errors": state.errors} + + +async def validate_recommendations(state: RepositoryAnalysisState) -> Dict[str, Any]: + """ + Validate that generated recommendations contain valid YAML. + """ + try: + logger.info("Validating rule recommendations") + + import yaml + + valid_recommendations = [] + + for rec in state.recommendations: + try: + # Parse YAML to validate syntax + parsed = yaml.safe_load(rec.yaml_content) + if parsed and isinstance(parsed, dict): + valid_recommendations.append(rec) + else: + logger.warning(f"Invalid rule structure: {rec.yaml_content[:100]}...") + except yaml.YAMLError as e: + logger.error(f"Invalid YAML in recommendation: {e}") + continue + + state.recommendations = valid_recommendations + state.analysis_steps.append("recommendations_validated") + + logger.info(f"Validated {len(valid_recommendations)} recommendations") + + return {"recommendations": valid_recommendations, "analysis_steps": state.analysis_steps} + + except Exception as e: + logger.error(f"Error validating recommendations: {e}") + state.errors.append(f"Recommendation validation failed: {str(e)}") + return {"errors": state.errors} + + +async def summarize_analysis(state: RepositoryAnalysisState) -> Dict[str, Any]: + """ + Create a summary of the analysis findings. + """ + try: + logger.info("Creating analysis summary") + + summary = { + "repository": state.repository_full_name, + "features_analyzed": { + "has_contributing": state.repository_features.has_contributing, + "has_codeowners": state.repository_features.has_codeowners, + "has_workflows": state.repository_features.has_workflows, + "contributor_count": state.repository_features.contributor_count, + }, + "recommendations_count": len(state.recommendations), + "recommendations_by_category": {}, + "high_confidence_count": 0, + "analysis_steps_completed": len(state.analysis_steps), + "errors_encountered": len(state.errors), + } + + # Count recommendations by category + for rec in state.recommendations: + summary["recommendations_by_category"][rec.category] = ( + summary["recommendations_by_category"].get(rec.category, 0) + 1 + ) + if rec.confidence >= 0.8: + summary["high_confidence_count"] += 1 + + state.analysis_summary = summary + state.analysis_steps.append("analysis_summarized") + + logger.info("Analysis summary created") + + return {"analysis_summary": summary, "analysis_steps": state.analysis_steps} + + except Exception as e: + logger.error(f"Error creating analysis summary: {e}") + state.errors.append(f"Analysis summary failed: {str(e)}") + return {"errors": state.errors} diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py index 7e29db8..d899338 100644 --- a/src/integrations/github/api.py +++ b/src/integrations/github/api.py @@ -418,6 +418,43 @@ async def get_pull_request(self, repo: str, pr_number: int, installation_id: int logger.error(f"Error getting PR #{pr_number} from {repo}: {e}") return {} + async def list_pull_requests( + self, repo: str, installation_id: int, state: str = "all", per_page: int = 20 + ) -> list[dict[str, Any]]: + """ + List pull requests for a repository. + + Args: + repo: Full repo name (owner/repo) + installation_id: GitHub App installation id + state: "open", "closed", or "all" + per_page: max items to fetch (up to 100) + """ + try: + token = await self.get_installation_access_token(installation_id) + if not token: + logger.error(f"Failed to get installation token for {installation_id}") + return [] + + headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"} + url = f"{config.github.api_base_url}/repos/{repo}/pulls?state={state}&per_page={min(per_page, 100)}" + + session = await self._get_session() + async with session.get(url, headers=headers) as response: + if response.status == 200: + result = await response.json() + logger.info(f"Retrieved {len(result)} pull requests for {repo}") + return result + else: + error_text = await response.text() + logger.error( + f"Failed to list pull requests for {repo}. Status: {response.status}, Response: {error_text}" + ) + return [] + except Exception as e: + logger.error(f"Error listing pull requests for {repo}: {e}") + return [] + async def create_deployment_status( self, repo: str, From 8721eacf2009c15e59f82c788ac8d4b82cb52d06 Mon Sep 17 00:00:00 2001 From: naaa760 Date: Tue, 9 Dec 2025 12:37:54 +0530 Subject: [PATCH 2/3] feat: render rules bundle and PR template from analysis --- src/agents/repository_analysis_agent/agent.py | 43 ++++++++++++++- .../repository_analysis_agent/models.py | 8 +++ .../test_repository_analysis_rendering.py | 53 +++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tests/unit/agents/test_repository_analysis_rendering.py diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py index f33f4ca..4db6b7f 100644 --- a/src/agents/repository_analysis_agent/agent.py +++ b/src/agents/repository_analysis_agent/agent.py @@ -1,7 +1,7 @@ import logging import time from datetime import datetime -from typing import Any, Dict +from typing import Any, Dict, List from langgraph.graph import END, START, StateGraph @@ -13,6 +13,7 @@ ) from src.agents.repository_analysis_agent.nodes import ( analyze_contributing_guidelines, + analyze_pr_history, analyze_repository_structure, generate_rule_recommendations, summarize_analysis, @@ -129,6 +130,8 @@ async def execute( analysis_summary=state.analysis_summary, analyzed_at=datetime.now().isoformat(), total_recommendations=len(state.recommendations), + rules_yaml=self._build_rules_yaml(state.recommendations), + pr_template=self._build_pr_template(repository_full_name, state.recommendations), ) # Check for errors @@ -167,6 +170,44 @@ async def execute( } ) + @staticmethod + def _build_rules_yaml(recommendations: List[Any]) -> str: + """Render a combined rules.yaml from individual recommendations.""" + if not recommendations: + return "" + + lines = ["rules:"] + for rec in recommendations: + yaml = rec.yaml_content.rstrip("\n") + # Indent each line by two spaces to nest under rules: + indented = "\n".join(f" {line}" for line in yaml.splitlines()) + lines.append(indented) + + return "\n".join(lines) + "\n" + + @staticmethod + def _build_pr_template(repo_full_name: str, recommendations: List[Any]) -> str: + """Build a PR body with install steps and rule summary.""" + bullet_rules = "\n".join(f"- {rec.yaml_content.splitlines()[0].replace('description: ', '').strip('\"')}" + for rec in recommendations) + + return f"""## Watchflow Rule Proposal for {repo_full_name} + +We've analyzed recent PRs and propose enabling the following rules: + +{bullet_rules} + +### Installation +1) Install the Watchflow GitHub App and grant access to this repo. +2) Add the provided `rules.yaml` under `.watchflow/`. +3) Watchflow will start reporting on PRs with the rules above. + +### Files Included +- `.watchflow/rules.yaml` (see below) + +If you'd like, we can adjust these rules or add more coverage based on your feedback. +""" + async def analyze_repository(self, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse: """ Convenience method for analyzing a repository using the request model. diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py index 8280531..f6bc647 100644 --- a/src/agents/repository_analysis_agent/models.py +++ b/src/agents/repository_analysis_agent/models.py @@ -111,3 +111,11 @@ class RepositoryAnalysisResponse(BaseModel): ) analyzed_at: str = Field(description="Timestamp of analysis") total_recommendations: int = Field(description="Total number of recommendations made") + rules_yaml: str = Field( + description="Rendered rules.yaml content for easy consumption", + default="", + ) + pr_template: str = Field( + description="Prebuilt PR body that includes install steps and rule summary", + default="", + ) diff --git a/tests/unit/agents/test_repository_analysis_rendering.py b/tests/unit/agents/test_repository_analysis_rendering.py new file mode 100644 index 0000000..1f35d13 --- /dev/null +++ b/tests/unit/agents/test_repository_analysis_rendering.py @@ -0,0 +1,53 @@ +from src.agents.repository_analysis_agent.agent import RepositoryAnalysisAgent +from src.agents.repository_analysis_agent.models import RuleRecommendation + + +def test_build_rules_yaml_renders_rules(): + agent = RepositoryAnalysisAgent() + recs = [ + RuleRecommendation( + yaml_content="""description: "Rule A" +enabled: true +event_types: ["pull_request"] +parameters: + foo: bar +""", + confidence=0.9, + reasoning="test", + source_patterns=[], + category="quality", + estimated_impact="high", + ) + ] + + rendered = agent._build_rules_yaml(recs) + + assert rendered.startswith("rules:") + assert "description: \"Rule A\"" in rendered + # Ensure indentation under rules: + assert "\n description" in rendered + + +def test_build_pr_template_includes_repo_and_rules(): + agent = RepositoryAnalysisAgent() + recs = [ + RuleRecommendation( + yaml_content="""description: "Rule A" +enabled: true +event_types: ["pull_request"] +parameters: {} +""", + confidence=0.9, + reasoning="test", + source_patterns=[], + category="quality", + estimated_impact="high", + ) + ] + + pr_body = agent._build_pr_template("owner/repo", recs) + + assert "owner/repo" in pr_body + assert "Rule A" in pr_body + assert "Install the Watchflow GitHub App" in pr_body + From fda660e52fb3255e023e6b1c1d3f1809e702e330 Mon Sep 17 00:00:00 2001 From: naaa760 Date: Tue, 9 Dec 2025 14:45:05 +0530 Subject: [PATCH 3/3] chore: add prompt stubs for repository analysis agent --- src/agents/repository_analysis_agent/prompts.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/agents/repository_analysis_agent/prompts.py diff --git a/src/agents/repository_analysis_agent/prompts.py b/src/agents/repository_analysis_agent/prompts.py new file mode 100644 index 0000000..799f284 --- /dev/null +++ b/src/agents/repository_analysis_agent/prompts.py @@ -0,0 +1,9 @@ +""" +Minimal prompt stubs for repository analysis agent. +These constants are placeholders; adjust as needed for richer LLM workflows. +""" + +CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT = "Analyze contributing guidelines." +REPOSITORY_ANALYSIS_PROMPT = "Analyze repository structure." +RULE_GENERATION_PROMPT = "Generate rule YAML." +