From 6b0307faeca9f8b2f344c7708ead196f930392ef Mon Sep 17 00:00:00 2001
From: naaa760 <neh6a683@gmail.com>
Date: Tue, 9 Dec 2025 12:27:22 +0530
Subject: [PATCH 1/3] feat: add PR history sampling and diff-aware rule
 recommendations

---
 src/agents/repository_analysis_agent/agent.py | 195 ++++++++
 .../repository_analysis_agent/models.py       | 113 +++++
 src/agents/repository_analysis_agent/nodes.py | 443 ++++++++++++++++++
 src/integrations/github/api.py                |  37 ++
 4 files changed, 788 insertions(+)
 create mode 100644 src/agents/repository_analysis_agent/agent.py
 create mode 100644 src/agents/repository_analysis_agent/models.py
 create mode 100644 src/agents/repository_analysis_agent/nodes.py

diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py
new file mode 100644
index 0000000..f33f4ca
--- /dev/null
+++ b/src/agents/repository_analysis_agent/agent.py
@@ -0,0 +1,195 @@
+import logging
+import time
+from datetime import datetime
+from typing import Any, Dict
+
+from langgraph.graph import END, START, StateGraph
+
+from src.agents.base import AgentResult, BaseAgent
+from src.agents.repository_analysis_agent.models import (
+    RepositoryAnalysisRequest,
+    RepositoryAnalysisResponse,
+    RepositoryAnalysisState,
+)
+from src.agents.repository_analysis_agent.nodes import (
+    analyze_contributing_guidelines,
+    analyze_repository_structure,
+    generate_rule_recommendations,
+    summarize_analysis,
+    validate_recommendations,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class RepositoryAnalysisAgent(BaseAgent):
+    """
+    Agent that analyzes GitHub repositories to generate Watchflow rule recommendations.
+
+    This agent performs multi-step analysis:
+    1. Analyzes repository structure and features
+    2. Parses contributing guidelines for patterns
+    3. Reviews commit/PR patterns
+    4. Generates rule recommendations with confidence scores
+    5. Validates recommendations are valid YAML
+
+    Returns structured recommendations that can be directly used as Watchflow rules.
+    """
+
+    def __init__(self, max_retries: int = 3, timeout: float = 120.0):
+        super().__init__(max_retries=max_retries, agent_name="repository_analysis_agent")
+        self.timeout = timeout
+
+        logger.info("Repository Analysis Agent initialized")
+        logger.info(f"Max retries: {max_retries}, Timeout: {timeout}s")
+
+    def _build_graph(self) -> StateGraph:
+        """Build the LangGraph workflow for repository analysis."""
+        workflow = StateGraph(RepositoryAnalysisState)
+
+        # Add nodes
+        workflow.add_node("analyze_repository_structure", analyze_repository_structure)
+        workflow.add_node("analyze_pr_history", analyze_pr_history)
+        workflow.add_node("analyze_contributing_guidelines", analyze_contributing_guidelines)
+        workflow.add_node("generate_rule_recommendations", generate_rule_recommendations)
+        workflow.add_node("validate_recommendations", validate_recommendations)
+        workflow.add_node("summarize_analysis", summarize_analysis)
+
+        # Define workflow edges
+        workflow.add_edge(START, "analyze_repository_structure")
+        workflow.add_edge("analyze_repository_structure", "analyze_pr_history")
+        workflow.add_edge("analyze_pr_history", "analyze_contributing_guidelines")
+        workflow.add_edge("analyze_contributing_guidelines", "generate_rule_recommendations")
+        workflow.add_edge("generate_rule_recommendations", "validate_recommendations")
+        workflow.add_edge("validate_recommendations", "summarize_analysis")
+        workflow.add_edge("summarize_analysis", END)
+
+        return workflow.compile()
+
+    async def execute(
+        self,
+        repository_full_name: str,
+        installation_id: int | None = None,
+        **kwargs
+    ) -> AgentResult:
+        """
+        Analyze a repository and generate rule recommendations.
+
+        Args:
+            repository_full_name: Full repository name (owner/repo)
+            installation_id: Optional GitHub App installation ID for private repos
+            **kwargs: Additional parameters
+
+        Returns:
+            AgentResult containing analysis results and recommendations
+        """
+        start_time = time.time()
+
+        try:
+            logger.info(f"Starting repository analysis for {repository_full_name}")
+
+            # Validate input
+            if not repository_full_name or "/" not in repository_full_name:
+                return AgentResult(
+                    success=False,
+                    message="Invalid repository name format. Expected 'owner/repo'",
+                    data={},
+                    metadata={"execution_time_ms": 0}
+                )
+
+           
+            initial_state = RepositoryAnalysisState(
+                repository_full_name=repository_full_name,
+                installation_id=installation_id,
+                analysis_steps=[],
+                errors=[],
+            )
+
+            logger.info("Initial state prepared, starting analysis workflow")
+
+            
+            result = await self._execute_with_timeout(
+                self.graph.ainvoke(initial_state),
+                timeout=self.timeout
+            )
+
+            execution_time = time.time() - start_time
+            logger.info(f"Analysis completed in {execution_time:.2f}s")
+
+          
+            if isinstance(result, dict):
+                state = RepositoryAnalysisState(**result)
+            else:
+                state = result
+
+         
+            response = RepositoryAnalysisResponse(
+                repository_full_name=repository_full_name,
+                recommendations=state.recommendations,
+                analysis_summary=state.analysis_summary,
+                analyzed_at=datetime.now().isoformat(),
+                total_recommendations=len(state.recommendations),
+            )
+
+            # Check for errors
+            has_errors = len(state.errors) > 0
+            success_message = (
+                f"Analysis completed successfully with {len(state.recommendations)} recommendations"
+            )
+            if has_errors:
+                success_message += f" ({len(state.errors)} errors encountered)"
+
+            logger.info(f"Analysis result: {len(state.recommendations)} recommendations, {len(state.errors)} errors")
+
+            return AgentResult(
+                success=not has_errors,  
+                message=success_message,
+                data={"analysis_response": response},
+                metadata={
+                    "execution_time_ms": execution_time * 1000,
+                    "recommendations_count": len(state.recommendations),
+                    "errors_count": len(state.errors),
+                    "analysis_steps": state.analysis_steps,
+                }
+            )
+
+        except Exception as e:
+            execution_time = time.time() - start_time
+            logger.error(f"Error in repository analysis: {e}")
+
+            return AgentResult(
+                success=False,
+                message=f"Repository analysis failed: {str(e)}",
+                data={},
+                metadata={
+                    "execution_time_ms": execution_time * 1000,
+                    "error_type": type(e).__name__,
+                }
+            )
+
+    async def analyze_repository(self, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse:
+        """
+        Convenience method for analyzing a repository using the request model.
+
+        Args:
+            request: Repository analysis request
+
+        Returns:
+            Repository analysis response
+        """
+        result = await self.execute(
+            repository_full_name=request.repository_full_name,
+            installation_id=request.installation_id,
+        )
+
+        if result.success and "analysis_response" in result.data:
+            return result.data["analysis_response"]
+        else:
+           
+            return RepositoryAnalysisResponse(
+                repository_full_name=request.repository_full_name,
+                recommendations=[],
+                analysis_summary={"error": result.message},
+                analyzed_at=datetime.now().isoformat(),
+                total_recommendations=0,
+            )
diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py
new file mode 100644
index 0000000..8280531
--- /dev/null
+++ b/src/agents/repository_analysis_agent/models.py
@@ -0,0 +1,113 @@
+from enum import Enum
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class AnalysisSource(str, Enum):
+    """Sources of analysis data for rule recommendations."""
+
+    CONTRIBUTING_GUIDELINES = "contributing_guidelines"
+    REPOSITORY_STRUCTURE = "repository_structure"
+    WORKFLOWS = "workflows"
+    BRANCH_PROTECTION = "branch_protection"
+    COMMIT_PATTERNS = "commit_patterns"
+    PR_PATTERNS = "pr_patterns"
+
+
+class RuleRecommendation(BaseModel):
+    """A recommended Watchflow rule with confidence and reasoning."""
+
+    yaml_content: str = Field(description="Valid Watchflow rule YAML content")
+    confidence: float = Field(
+        description="Confidence score (0.0-1.0) in the recommendation",
+        ge=0.0,
+        le=1.0
+    )
+    reasoning: str = Field(description="Explanation of why this rule is recommended")
+    source_patterns: List[str] = Field(
+        description="Repository patterns that led to this recommendation",
+        default_factory=list
+    )
+    category: str = Field(description="Category of the rule (e.g., 'quality', 'security', 'process')")
+    estimated_impact: str = Field(description="Expected impact (e.g., 'high', 'medium', 'low')")
+
+
+class RepositoryAnalysisRequest(BaseModel):
+    """Request model for repository analysis."""
+
+    repository_full_name: str = Field(description="Full repository name (owner/repo)")
+    installation_id: Optional[int] = Field(
+        description="GitHub App installation ID for accessing private repos",
+        default=None
+    )
+
+
+class RepositoryFeatures(BaseModel):
+    """Features and characteristics discovered in the repository."""
+
+    has_contributing: bool = Field(description="Has CONTRIBUTING.md file", default=False)
+    has_codeowners: bool = Field(description="Has CODEOWNERS file", default=False)
+    has_workflows: bool = Field(description="Has GitHub Actions workflows", default=False)
+    has_branch_protection: bool = Field(description="Has branch protection rules", default=False)
+    workflow_count: int = Field(description="Number of workflow files", default=0)
+    language: Optional[str] = Field(description="Primary programming language", default=None)
+    contributor_count: int = Field(description="Number of contributors", default=0)
+    pr_count: int = Field(description="Number of pull requests", default=0)
+    issue_count: int = Field(description="Number of issues", default=0)
+
+
+class ContributingGuidelinesAnalysis(BaseModel):
+    """Analysis of contributing guidelines content."""
+
+    content: Optional[str] = Field(description="Full CONTRIBUTING.md content", default=None)
+    has_pr_template: bool = Field(description="Requires PR templates", default=False)
+    has_issue_template: bool = Field(description="Requires issue templates", default=False)
+    requires_tests: bool = Field(description="Requires tests for contributions", default=False)
+    requires_docs: bool = Field(description="Requires documentation updates", default=False)
+    code_style_requirements: List[str] = Field(
+        description="Code style requirements mentioned",
+        default_factory=list
+    )
+    review_requirements: List[str] = Field(
+        description="Code review requirements mentioned",
+        default_factory=list
+    )
+
+
+class RepositoryAnalysisState(BaseModel):
+    """State for the repository analysis workflow."""
+
+    repository_full_name: str
+    installation_id: Optional[int]
+    pr_samples: List[dict[str, Any]] = Field(default_factory=list)
+
+    # Analysis data
+    repository_features: RepositoryFeatures = Field(default_factory=RepositoryFeatures)
+    contributing_analysis: ContributingGuidelinesAnalysis = Field(
+        default_factory=ContributingGuidelinesAnalysis
+    )
+
+    # Processing state
+    analysis_steps: List[str] = Field(default_factory=list)
+    errors: List[str] = Field(default_factory=list)
+
+    # Results
+    recommendations: List[RuleRecommendation] = Field(default_factory=list)
+    analysis_summary: Dict[str, Any] = Field(default_factory=dict)
+
+
+class RepositoryAnalysisResponse(BaseModel):
+    """Response model containing rule recommendations."""
+
+    repository_full_name: str = Field(description="Repository that was analyzed")
+    recommendations: List[RuleRecommendation] = Field(
+        description="List of recommended Watchflow rules",
+        default_factory=list
+    )
+    analysis_summary: Dict[str, Any] = Field(
+        description="Summary of analysis findings",
+        default_factory=dict
+    )
+    analyzed_at: str = Field(description="Timestamp of analysis")
+    total_recommendations: int = Field(description="Total number of recommendations made")
diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py
new file mode 100644
index 0000000..02262a0
--- /dev/null
+++ b/src/agents/repository_analysis_agent/nodes.py
@@ -0,0 +1,443 @@
+import logging
+from typing import Any, Dict
+
+from src.agents.repository_analysis_agent.models import (
+    AnalysisSource,
+    ContributingGuidelinesAnalysis,
+    RepositoryAnalysisState,
+    RepositoryFeatures,
+    RuleRecommendation,
+)
+from src.agents.repository_analysis_agent.prompts import (
+    CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT,
+    REPOSITORY_ANALYSIS_PROMPT,
+    RULE_GENERATION_PROMPT,
+)
+from src.integrations.github.api import github_client
+
+logger = logging.getLogger(__name__)
+
+
+async def analyze_repository_structure(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """
+    Analyze basic repository structure and features.
+
+    Gathers information about workflows, branch protection, contributors, etc.
+    """
+    try:
+        logger.info(f"Analyzing repository structure for {state.repository_full_name}")
+
+        features = RepositoryFeatures()
+        contributing_content = await github_client.get_file_content(
+            state.repository_full_name, "CONTRIBUTING.md", state.installation_id
+        )
+        features.has_contributing = contributing_content is not None
+
+        codeowners_content = await github_client.get_file_content(
+            state.repository_full_name, ".github/CODEOWNERS", state.installation_id
+        )
+        features.has_codeowners = codeowners_content is not None
+
+        
+        workflow_content = await github_client.get_file_content(
+            state.repository_full_name, ".github/workflows/main.yml", state.installation_id
+        )
+        if workflow_content:
+            features.has_workflows = True
+            features.workflow_count = 1  
+
+      
+        contributors = await github_client.get_repository_contributors(
+            state.repository_full_name, state.installation_id
+        )
+        features.contributor_count = len(contributors) if contributors else 0
+
+        # TODO: Add more repository analysis (PR count, issues, language detection, etc.)
+
+        logger.info(f"Repository analysis complete: {features.model_dump()}")
+
+        state.repository_features = features
+        state.analysis_steps.append("repository_structure_analyzed")
+
+        return {"repository_features": features, "analysis_steps": state.analysis_steps}
+
+    except Exception as e:
+        logger.error(f"Error analyzing repository structure: {e}")
+        state.errors.append(f"Repository structure analysis failed: {str(e)}")
+        return {"errors": state.errors}
+
+
+async def analyze_pr_history(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """Pull a small PR sample to inform rule recommendations."""
+    try:
+        logger.info(f"Fetching recent PRs for {state.repository_full_name}")
+        prs = await github_client.list_pull_requests(
+            state.repository_full_name, state.installation_id or 0, state="closed", per_page=20
+        )
+
+        pr_samples: list[dict[str, Any]] = []
+        for pr in prs:
+            pr_samples.append(
+                {
+                    "number": pr.get("number"),
+                    "title": pr.get("title"),
+                    "merged": pr.get("merged_at") is not None,
+                    "changed_files": pr.get("changed_files"),
+                    "additions": pr.get("additions"),
+                    "deletions": pr.get("deletions"),
+                    "user": pr.get("user", {}).get("login"),
+                }
+            )
+
+        state.pr_samples = pr_samples
+        state.analysis_steps.append("pr_history_sampled")
+        logger.info(f"Collected {len(pr_samples)} PR samples")
+        return {"pr_samples": pr_samples, "analysis_steps": state.analysis_steps}
+    except Exception as e:
+        logger.error(f"Error analyzing PR history: {e}")
+        state.errors.append(f"PR history analysis failed: {str(e)}")
+        return {"errors": state.errors}
+
+
+async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """
+    Analyze CONTRIBUTING.md file for patterns and requirements.
+    """
+    try:
+        logger.info(f" Analyzing contributing guidelines for {state.repository_full_name}")
+
+        # Get contributing guidelines content
+        content = await github_client.get_file_content(
+            state.repository_full_name, "CONTRIBUTING.md", state.installation_id
+        )
+
+        if not content:
+            logger.info("No CONTRIBUTING.md file found")
+            analysis = ContributingGuidelinesAnalysis()
+        else:
+           
+            llm = github_client.llm if hasattr(github_client, 'llm') else None
+            if llm:
+                try:
+                    prompt = CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT.format(content=content)
+                    response = await llm.ainvoke(prompt)
+
+                   
+                    # TODO: Parse JSON response and create ContributingGuidelinesAnalysis
+
+                    analysis = ContributingGuidelinesAnalysis(content=content)
+                except Exception as e:
+                    logger.error(f"LLM analysis failed: {e}")
+                    analysis = ContributingGuidelinesAnalysis(content=content)
+            else:
+                analysis = ContributingGuidelinesAnalysis(content=content)
+
+        state.contributing_analysis = analysis
+        state.analysis_steps.append("contributing_guidelines_analyzed")
+
+        logger.info(" Contributing guidelines analysis complete")
+
+        return {"contributing_analysis": analysis, "analysis_steps": state.analysis_steps}
+
+    except Exception as e:
+        logger.error(f"Error analyzing contributing guidelines: {e}")
+        state.errors.append(f"Contributing guidelines analysis failed: {str(e)}")
+        return {"errors": state.errors}
+
+
+async def generate_rule_recommendations(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """
+    Generate Watchflow rule recommendations based on repository analysis.
+    """
+    try:
+        logger.info(f" Generating rule recommendations for {state.repository_full_name}")
+
+        recommendations = []
+
+        features = state.repository_features
+        contributing = state.contributing_analysis
+
+        
+        # Diff-aware: enforce filter handling in core RAG/query code
+        recommendations.append(
+            RuleRecommendation(
+                yaml_content="""description: "Block merges when PRs change filter validation logic without failing on invalid inputs"
+enabled: true
+severity: "high"
+event_types: ["pull_request"]
+parameters:
+  file_patterns:
+    - "packages/core/src/**/vector-query.ts"
+    - "packages/core/src/**/graph-rag.ts"
+    - "packages/core/src/**/filters/*.ts"
+  require_patterns:
+    - "throw\\\\s+new\\\\s+Error"
+    - "raise\\\\s+ValueError"
+  forbidden_patterns:
+    - "return\\\\s+.*filter\\\\s*$"
+  how_to_fix: "Ensure invalid filters raise descriptive errors instead of silently returning unfiltered results."
+""",
+                confidence=0.85,
+                reasoning="Filter handling regressions were flagged in historical fixes; enforce throws on invalid input.",
+                source_patterns=["pr_history"],
+                category="quality",
+                estimated_impact="high",
+            )
+        )
+
+        # Diff-aware: enforce test updates when core code changes
+        recommendations.append(
+            RuleRecommendation(
+                yaml_content="""description: "Require regression tests when modifying tool schema validation or client tool execution"
+enabled: true
+severity: "medium"
+event_types: ["pull_request"]
+parameters:
+  source_patterns:
+    - "packages/core/src/**/tool*.ts"
+    - "packages/core/src/agent/**"
+    - "packages/client/**"
+  test_patterns:
+    - "packages/core/tests/**"
+    - "tests/**"
+  min_test_files: 1
+  rationale: "Tool invocation changes have previously caused regressions in clientTools streaming."
+""",
+                confidence=0.8,
+                reasoning="Core tool changes often broke client tools; require at least one related test update.",
+                source_patterns=["pr_history"],
+                category="quality",
+                estimated_impact="medium",
+            )
+        )
+
+        # Diff-aware: ensure agent descriptions exist
+        recommendations.append(
+            RuleRecommendation(
+                yaml_content="""description: "Ensure every agent exposes a user-facing description for UI profiles"
+enabled: true
+severity: "low"
+event_types: ["pull_request"]
+parameters:
+  file_patterns:
+    - "packages/core/src/agent/**"
+  required_text:
+    - "description"
+  message: "Add or update the agent description so downstream UIs can render capabilities."
+""",
+                confidence=0.75,
+                reasoning="Agent profile UIs require descriptions; ensure new/updated agents include them.",
+                source_patterns=["pr_history"],
+                category="process",
+                estimated_impact="low",
+            )
+        )
+
+        # Diff-aware: preserve URL handling for supported providers
+        recommendations.append(
+            RuleRecommendation(
+                yaml_content="""description: "Block merges when URL or asset handling changes bypass provider capability checks"
+enabled: true
+severity: "high"
+event_types: ["pull_request"]
+parameters:
+  file_patterns:
+    - "packages/core/src/agent/message-list/**"
+    - "packages/core/src/llm/**"
+  require_patterns:
+    - "isUrlSupportedByModel"
+  forbidden_patterns:
+    - "downloadAssetsFromMessages\\(messages\\)"
+  how_to_fix: "Preserve remote URLs for providers that support them natively; only download assets for unsupported providers."
+""",
+                confidence=0.8,
+                reasoning="Past URL handling bugs; ensure capability checks remain intact.",
+                source_patterns=["pr_history"],
+                category="quality",
+                estimated_impact="high",
+            )
+        )
+
+        # Legacy structural signals retained for completeness
+        if features.has_workflows:
+            recommendations.append(RuleRecommendation(
+                yaml_content="""description: "Require CI checks to pass"
+enabled: true
+severity: "high"
+event_types:
+  - pull_request
+conditions:
+  - type: "ci_checks_passed"
+    parameters:
+      required_checks: []
+actions:
+  - type: "block_merge"
+    parameters:
+      message: "All CI checks must pass before merging"
+""",
+                confidence=0.9,
+                reasoning="Repository has CI workflows configured, so requiring checks to pass is a standard practice",
+                source_patterns=["has_workflows"],
+                category="quality",
+                estimated_impact="high"
+            ))
+
+        if features.has_codeowners:
+            recommendations.append(RuleRecommendation(
+                yaml_content="""description: "Require CODEOWNERS approval for changes"
+enabled: true
+severity: "medium"
+event_types:
+  - pull_request
+conditions:
+  - type: "codeowners_approved"
+    parameters: {}
+actions:
+  - type: "require_approval"
+    parameters:
+      message: "CODEOWNERS must approve changes to owned files"
+""",
+                confidence=0.8,
+                reasoning="CODEOWNERS file exists, indicating ownership requirements for code changes",
+                source_patterns=["has_codeowners"],
+                category="process",
+                estimated_impact="medium"
+            ))
+
+        if contributing.requires_tests:
+            recommendations.append(RuleRecommendation(
+                yaml_content="""description: "Require test coverage for code changes"
+enabled: true
+severity: "medium"
+event_types:
+  - pull_request
+conditions:
+  - type: "test_coverage_threshold"
+    parameters:
+      minimum_coverage: 80
+actions:
+  - type: "block_merge"
+    parameters:
+      message: "Test coverage must be at least 80%"
+""",
+                confidence=0.7,
+                reasoning="Contributing guidelines mention testing requirements",
+                source_patterns=["requires_tests"],
+                category="quality",
+                estimated_impact="medium"
+            ))
+
+        if features.contributor_count > 10:
+            recommendations.append(RuleRecommendation(
+                yaml_content="""description: "Require at least one approval for pull requests"
+enabled: true
+severity: "medium"
+event_types:
+  - pull_request
+conditions:
+  - type: "minimum_approvals"
+    parameters:
+      count: 1
+actions:
+  - type: "block_merge"
+    parameters:
+      message: "Pull requests require at least one approval"
+""",
+                confidence=0.6,
+                reasoning="Repository has multiple contributors, indicating collaborative development",
+                source_patterns=["contributor_count"],
+                category="process",
+                estimated_impact="medium"
+            ))
+
+        
+        state.recommendations = recommendations
+        state.analysis_steps.append("recommendations_generated")
+
+        logger.info(f"Generated {len(recommendations)} rule recommendations")
+
+        return {"recommendations": recommendations, "analysis_steps": state.analysis_steps}
+
+    except Exception as e:
+        logger.error(f"Error generating recommendations: {e}")
+        state.errors.append(f"Recommendation generation failed: {str(e)}")
+        return {"errors": state.errors}
+
+
+async def validate_recommendations(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """
+    Validate that generated recommendations contain valid YAML.
+    """
+    try:
+        logger.info("Validating rule recommendations")
+
+        import yaml
+
+        valid_recommendations = []
+
+        for rec in state.recommendations:
+            try:
+                # Parse YAML to validate syntax
+                parsed = yaml.safe_load(rec.yaml_content)
+                if parsed and isinstance(parsed, dict):
+                    valid_recommendations.append(rec)
+                else:
+                    logger.warning(f"Invalid rule structure: {rec.yaml_content[:100]}...")
+            except yaml.YAMLError as e:
+                logger.error(f"Invalid YAML in recommendation: {e}")
+                continue
+
+        state.recommendations = valid_recommendations
+        state.analysis_steps.append("recommendations_validated")
+
+        logger.info(f"Validated {len(valid_recommendations)} recommendations")
+
+        return {"recommendations": valid_recommendations, "analysis_steps": state.analysis_steps}
+
+    except Exception as e:
+        logger.error(f"Error validating recommendations: {e}")
+        state.errors.append(f"Recommendation validation failed: {str(e)}")
+        return {"errors": state.errors}
+
+
+async def summarize_analysis(state: RepositoryAnalysisState) -> Dict[str, Any]:
+    """
+    Create a summary of the analysis findings.
+    """
+    try:
+        logger.info("Creating analysis summary")
+
+        summary = {
+            "repository": state.repository_full_name,
+            "features_analyzed": {
+                "has_contributing": state.repository_features.has_contributing,
+                "has_codeowners": state.repository_features.has_codeowners,
+                "has_workflows": state.repository_features.has_workflows,
+                "contributor_count": state.repository_features.contributor_count,
+            },
+            "recommendations_count": len(state.recommendations),
+            "recommendations_by_category": {},
+            "high_confidence_count": 0,
+            "analysis_steps_completed": len(state.analysis_steps),
+            "errors_encountered": len(state.errors),
+        }
+
+        # Count recommendations by category
+        for rec in state.recommendations:
+            summary["recommendations_by_category"][rec.category] = (
+                summary["recommendations_by_category"].get(rec.category, 0) + 1
+            )
+            if rec.confidence >= 0.8:
+                summary["high_confidence_count"] += 1
+
+        state.analysis_summary = summary
+        state.analysis_steps.append("analysis_summarized")
+
+        logger.info("Analysis summary created")
+
+        return {"analysis_summary": summary, "analysis_steps": state.analysis_steps}
+
+    except Exception as e:
+        logger.error(f"Error creating analysis summary: {e}")
+        state.errors.append(f"Analysis summary failed: {str(e)}")
+        return {"errors": state.errors}
diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py
index 7e29db8..d899338 100644
--- a/src/integrations/github/api.py
+++ b/src/integrations/github/api.py
@@ -418,6 +418,43 @@ async def get_pull_request(self, repo: str, pr_number: int, installation_id: int
             logger.error(f"Error getting PR #{pr_number} from {repo}: {e}")
             return {}
 
+    async def list_pull_requests(
+        self, repo: str, installation_id: int, state: str = "all", per_page: int = 20
+    ) -> list[dict[str, Any]]:
+        """
+        List pull requests for a repository.
+
+        Args:
+            repo: Full repo name (owner/repo)
+            installation_id: GitHub App installation id
+            state: "open", "closed", or "all"
+            per_page: max items to fetch (up to 100)
+        """
+        try:
+            token = await self.get_installation_access_token(installation_id)
+            if not token:
+                logger.error(f"Failed to get installation token for {installation_id}")
+                return []
+
+            headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"}
+            url = f"{config.github.api_base_url}/repos/{repo}/pulls?state={state}&per_page={min(per_page, 100)}"
+
+            session = await self._get_session()
+            async with session.get(url, headers=headers) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    logger.info(f"Retrieved {len(result)} pull requests for {repo}")
+                    return result
+                else:
+                    error_text = await response.text()
+                    logger.error(
+                        f"Failed to list pull requests for {repo}. Status: {response.status}, Response: {error_text}"
+                    )
+                    return []
+        except Exception as e:
+            logger.error(f"Error listing pull requests for {repo}: {e}")
+            return []
+
     async def create_deployment_status(
         self,
         repo: str,

From 8721eacf2009c15e59f82c788ac8d4b82cb52d06 Mon Sep 17 00:00:00 2001
From: naaa760 <neh6a683@gmail.com>
Date: Tue, 9 Dec 2025 12:37:54 +0530
Subject: [PATCH 2/3] feat: render rules bundle and PR template from analysis

---
 src/agents/repository_analysis_agent/agent.py | 43 ++++++++++++++-
 .../repository_analysis_agent/models.py       |  8 +++
 .../test_repository_analysis_rendering.py     | 53 +++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/agents/test_repository_analysis_rendering.py

diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py
index f33f4ca..4db6b7f 100644
--- a/src/agents/repository_analysis_agent/agent.py
+++ b/src/agents/repository_analysis_agent/agent.py
@@ -1,7 +1,7 @@
 import logging
 import time
 from datetime import datetime
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 from langgraph.graph import END, START, StateGraph
 
@@ -13,6 +13,7 @@
 )
 from src.agents.repository_analysis_agent.nodes import (
     analyze_contributing_guidelines,
+    analyze_pr_history,
     analyze_repository_structure,
     generate_rule_recommendations,
     summarize_analysis,
@@ -129,6 +130,8 @@ async def execute(
                 analysis_summary=state.analysis_summary,
                 analyzed_at=datetime.now().isoformat(),
                 total_recommendations=len(state.recommendations),
+                rules_yaml=self._build_rules_yaml(state.recommendations),
+                pr_template=self._build_pr_template(repository_full_name, state.recommendations),
             )
 
             # Check for errors
@@ -167,6 +170,44 @@ async def execute(
                 }
             )
 
+    @staticmethod
+    def _build_rules_yaml(recommendations: List[Any]) -> str:
+        """Render a combined rules.yaml from individual recommendations."""
+        if not recommendations:
+            return ""
+
+        lines = ["rules:"]
+        for rec in recommendations:
+            yaml = rec.yaml_content.rstrip("\n")
+            # Indent each line by two spaces to nest under rules:
+            indented = "\n".join(f"  {line}" for line in yaml.splitlines())
+            lines.append(indented)
+
+        return "\n".join(lines) + "\n"
+
+    @staticmethod
+    def _build_pr_template(repo_full_name: str, recommendations: List[Any]) -> str:
+        """Build a PR body with install steps and rule summary."""
+        bullet_rules = "\n".join(f"- {rec.yaml_content.splitlines()[0].replace('description: ', '').strip('\"')}"
+                                 for rec in recommendations)
+
+        return f"""## Watchflow Rule Proposal for {repo_full_name}
+
+We've analyzed recent PRs and propose enabling the following rules:
+
+{bullet_rules}
+
+### Installation
+1) Install the Watchflow GitHub App and grant access to this repo.
+2) Add the provided `rules.yaml` under `.watchflow/`.
+3) Watchflow will start reporting on PRs with the rules above.
+
+### Files Included
+- `.watchflow/rules.yaml` (see below)
+
+If you'd like, we can adjust these rules or add more coverage based on your feedback.
+"""
+
     async def analyze_repository(self, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse:
         """
         Convenience method for analyzing a repository using the request model.
diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py
index 8280531..f6bc647 100644
--- a/src/agents/repository_analysis_agent/models.py
+++ b/src/agents/repository_analysis_agent/models.py
@@ -111,3 +111,11 @@ class RepositoryAnalysisResponse(BaseModel):
     )
     analyzed_at: str = Field(description="Timestamp of analysis")
     total_recommendations: int = Field(description="Total number of recommendations made")
+    rules_yaml: str = Field(
+        description="Rendered rules.yaml content for easy consumption",
+        default="",
+    )
+    pr_template: str = Field(
+        description="Prebuilt PR body that includes install steps and rule summary",
+        default="",
+    )
diff --git a/tests/unit/agents/test_repository_analysis_rendering.py b/tests/unit/agents/test_repository_analysis_rendering.py
new file mode 100644
index 0000000..1f35d13
--- /dev/null
+++ b/tests/unit/agents/test_repository_analysis_rendering.py
@@ -0,0 +1,53 @@
+from src.agents.repository_analysis_agent.agent import RepositoryAnalysisAgent
+from src.agents.repository_analysis_agent.models import RuleRecommendation
+
+
+def test_build_rules_yaml_renders_rules():
+    agent = RepositoryAnalysisAgent()
+    recs = [
+        RuleRecommendation(
+            yaml_content="""description: "Rule A"
+enabled: true
+event_types: ["pull_request"]
+parameters:
+  foo: bar
+""",
+            confidence=0.9,
+            reasoning="test",
+            source_patterns=[],
+            category="quality",
+            estimated_impact="high",
+        )
+    ]
+
+    rendered = agent._build_rules_yaml(recs)
+
+    assert rendered.startswith("rules:")
+    assert "description: \"Rule A\"" in rendered
+    # Ensure indentation under rules:
+    assert "\n  description" in rendered
+
+
+def test_build_pr_template_includes_repo_and_rules():
+    agent = RepositoryAnalysisAgent()
+    recs = [
+        RuleRecommendation(
+            yaml_content="""description: "Rule A"
+enabled: true
+event_types: ["pull_request"]
+parameters: {}
+""",
+            confidence=0.9,
+            reasoning="test",
+            source_patterns=[],
+            category="quality",
+            estimated_impact="high",
+        )
+    ]
+
+    pr_body = agent._build_pr_template("owner/repo", recs)
+
+    assert "owner/repo" in pr_body
+    assert "Rule A" in pr_body
+    assert "Install the Watchflow GitHub App" in pr_body
+

From fda660e52fb3255e023e6b1c1d3f1809e702e330 Mon Sep 17 00:00:00 2001
From: naaa760 <neh6a683@gmail.com>
Date: Tue, 9 Dec 2025 14:45:05 +0530
Subject: [PATCH 3/3] chore: add prompt stubs for repository analysis agent

---
 src/agents/repository_analysis_agent/prompts.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 src/agents/repository_analysis_agent/prompts.py

diff --git a/src/agents/repository_analysis_agent/prompts.py b/src/agents/repository_analysis_agent/prompts.py
new file mode 100644
index 0000000..799f284
--- /dev/null
+++ b/src/agents/repository_analysis_agent/prompts.py
@@ -0,0 +1,9 @@
+"""
+Minimal prompt stubs for repository analysis agent.
+These constants are placeholders; adjust as needed for richer LLM workflows.
+"""
+
+CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT = "Analyze contributing guidelines."
+REPOSITORY_ANALYSIS_PROMPT = "Analyze repository structure."
+RULE_GENERATION_PROMPT = "Generate rule YAML."
+