diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py index acadc95..2a5c3d0 100644 --- a/src/agents/repository_analysis_agent/agent.py +++ b/src/agents/repository_analysis_agent/agent.py @@ -4,7 +4,7 @@ from __future__ import annotations -import time + from src.agents.base import AgentResult, BaseAgent from src.agents.repository_analysis_agent.models import RepositoryAnalysisRequest, RepositoryAnalysisState @@ -25,29 +25,16 @@ def _build_graph(self): # Graph orchestration is handled procedurally in execute for clarity. return None - async def execute(self, **kwargs) -> AgentResult: - started_at = time.perf_counter() - request = RepositoryAnalysisRequest(**kwargs) - state = RepositoryAnalysisState( - repository_full_name=request.repository_full_name, - installation_id=request.installation_id, - ) try: await analyze_repository_structure(state) await analyze_pr_history(state, request.max_prs) await analyze_contributing_guidelines(state) - state.recommendations = _default_recommendations(state) - validate_recommendations(state) - response = summarize_analysis(state, request) latency_ms = int((time.perf_counter() - started_at) * 1000) return AgentResult( - success=True, - message="Repository analysis completed", - data={"analysis_response": response}, - metadata={"execution_time_ms": latency_ms}, + ) except Exception as exc: # noqa: BLE001 latency_ms = int((time.perf_counter() - started_at) * 1000) @@ -55,5 +42,4 @@ async def execute(self, **kwargs) -> AgentResult: success=False, message=f"Repository analysis failed: {exc}", data={}, - metadata={"execution_time_ms": latency_ms}, ) diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py index 3f93518..74416b4 100644 --- a/src/agents/repository_analysis_agent/models.py +++ b/src/agents/repository_analysis_agent/models.py @@ -1,5 +1,4 @@ -from datetime import datetime -from typing import Any + from pydantic import BaseModel, Field, field_validator, model_validator @@ -30,154 +29,33 @@ def parse_github_repo_identifier(value: str) -> str: repo = parts[gh_idx + 2] if len(parts) > gh_idx + 2 else "" return f"{owner}/{repo}".rstrip("/").removesuffix(".git") - return raw.rstrip("/").removesuffix(".git") class PullRequestSample(BaseModel): """Minimal PR snapshot used for recommendations.""" - number: int - title: str - state: str - merged: bool = False - additions: int | None = None - deletions: int | None = None - changed_files: int | None = None - -class RuleRecommendation(BaseModel): - """A recommended Watchflow rule with confidence and reasoning.""" - - yaml_rule: str = Field(description="Valid Watchflow rule YAML content") - confidence: float = Field(description="Confidence score (0.0-1.0)", ge=0.0, le=1.0) - reasoning: str = Field(description="Short explanation of why this rule is recommended") - strategy_used: str = Field(description="Strategy used (static, hybrid, llm)") class RepositoryFeatures(BaseModel): """Features and characteristics discovered in the repository.""" - has_contributing: bool = Field(default=False, description="Has CONTRIBUTING.md file") - has_codeowners: bool = Field(default=False, description="Has CODEOWNERS file") - has_workflows: bool = Field(default=False, description="Has GitHub Actions workflows") - workflow_count: int = Field(default=0, description="Number of workflow files") - language: str | None = Field(default=None, description="Primary programming language") - contributor_count: int = Field(default=0, description="Number of contributors") - pr_count: int = Field(default=0, description="Number of pull requests") + class ContributingGuidelinesAnalysis(BaseModel): """Analysis of contributing guidelines content.""" - content: str | None = Field(default=None, description="Full CONTRIBUTING.md content") - has_pr_template: bool = Field(default=False, description="Requires PR templates") - has_issue_template: bool = Field(default=False, description="Requires issue templates") - requires_tests: bool = Field(default=False, description="Requires tests for contributions") - requires_docs: bool = Field(default=False, description="Requires documentation updates") - code_style_requirements: list[str] = Field(default_factory=list, description="Code style requirements mentioned") - review_requirements: list[str] = Field(default_factory=list, description="Code review requirements mentioned") - - -class PullRequestPlan(BaseModel): - """Plan for creating a PR with generated rules.""" - - branch_name: str = "watchflow/rules" - base_branch: str = "main" - commit_message: str = "chore: add Watchflow rules" - pr_title: str = "Add Watchflow rules" - pr_body: str = "This PR adds Watchflow rule recommendations." - file_path: str = ".watchflow/rules.yaml" - - -class RepositoryAnalysisRequest(BaseModel): - """Request model for repository analysis.""" - - repository_url: str | None = Field(default=None, description="GitHub repository URL") - repository_full_name: str | None = Field(default=None, description="Full repository name (owner/repo)") - installation_id: int | None = Field(default=None, description="GitHub App installation ID") - max_prs: int = Field(default=10, ge=0, le=50, description="Max PRs to sample for analysis") - - @field_validator("repository_full_name", mode="before") - @classmethod - def normalize_full_name(cls, value: str | None, info) -> str: - if value: - return parse_github_repo_identifier(value) - raw_url = info.data.get("repository_url") - return parse_github_repo_identifier(raw_url or "") - - @field_validator("repository_url", mode="before") - @classmethod - def strip_url(cls, value: str | None) -> str | None: - return value.strip() if isinstance(value, str) else value - - @model_validator(mode="after") - def populate_full_name(self) -> "RepositoryAnalysisRequest": - if not self.repository_full_name and self.repository_url: - self.repository_full_name = parse_github_repo_identifier(self.repository_url) - return self class RepositoryAnalysisState(BaseModel): """State for the repository analysis workflow.""" repository_full_name: str - installation_id: int | None - pr_samples: list[PullRequestSample] = Field(default_factory=list) - repository_features: RepositoryFeatures = Field(default_factory=RepositoryFeatures) - contributing_analysis: ContributingGuidelinesAnalysis = Field(default_factory=ContributingGuidelinesAnalysis) - recommendations: list[RuleRecommendation] = Field(default_factory=list) - rules_yaml: str | None = None - pr_plan: PullRequestPlan | None = None - analysis_summary: dict[str, Any] = Field(default_factory=dict) - errors: list[str] = Field(default_factory=list) + class RepositoryAnalysisResponse(BaseModel): """Response model containing rule recommendations and PR plan.""" repository_full_name: str = Field(description="Repository that was analyzed") - rules_yaml: str = Field(description="Combined Watchflow rules YAML") - recommendations: list[RuleRecommendation] = Field(default_factory=list, description="Rule recommendations") - pr_plan: PullRequestPlan | None = Field(default=None, description="Suggested PR plan") - analysis_summary: dict[str, Any] = Field(default_factory=dict, description="Summary of analysis findings") - analyzed_at: datetime = Field(default_factory=datetime.utcnow, description="Timestamp of analysis") - - -class ProceedWithPullRequestRequest(BaseModel): - """Request to create a PR with generated rules.""" - - repository_url: str | None = Field(default=None, description="GitHub repository URL") - repository_full_name: str | None = Field(default=None, description="Full repository name (owner/repo)") - installation_id: int | None = Field(default=None, description="GitHub App installation ID") - user_token: str | None = Field(default=None, description="User token for GitHub operations (optional)") - rules_yaml: str = Field(description="Rules YAML content to commit") - branch_name: str = Field(default="watchflow/rules", description="Branch to create or update") - base_branch: str = Field(default="main", description="Base branch for the PR") - commit_message: str = Field(default="chore: add Watchflow rules", description="Commit message") - pr_title: str = Field(default="Add Watchflow rules", description="Pull request title") - pr_body: str = Field(default="This PR adds Watchflow rule recommendations.", description="Pull request body") - file_path: str = Field(default=".watchflow/rules.yaml", description="Path to rules file in repo") - - @field_validator("repository_full_name", mode="before") - @classmethod - def normalize_full_name(cls, value: str | None, info) -> str: - if value: - return parse_github_repo_identifier(value) - raw_url = info.data.get("repository_url") - return parse_github_repo_identifier(raw_url or "") - - @model_validator(mode="after") - def populate_full_name(self) -> "ProceedWithPullRequestRequest": - if not self.repository_full_name and self.repository_url: - self.repository_full_name = parse_github_repo_identifier(self.repository_url) - return self - - -class ProceedWithPullRequestResponse(BaseModel): - """Response after creating the PR.""" - - pull_request_url: str - branch_name: str - base_branch: str - file_path: str - commit_sha: str | None = None diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index 8e97ab2..aa1bafc 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -1,19 +1,4 @@ -""" -Workflow nodes for the RepositoryAnalysisAgent. -Each node is a small, testable function that mutates the RepositoryAnalysisState. -The nodes favor static/hybrid strategies first and avoid heavy LLM calls unless -strictly necessary. -""" - -from __future__ import annotations - -import textwrap -from typing import Any - -import yaml - -from src.agents.repository_analysis_agent.models import ( ContributingGuidelinesAnalysis, PullRequestPlan, PullRequestSample, @@ -23,121 +8,7 @@ RepositoryFeatures, RuleRecommendation, ) -from src.integrations.github.api import github_client - -async def analyze_repository_structure(state: RepositoryAnalysisState) -> None: - """Collect repository metadata and structure signals.""" - repo = state.repository_full_name - installation_id = state.installation_id - - repo_data = await github_client.get_repository(repo, installation_id=installation_id) - workflows = await github_client.list_directory_any_auth( - repo_full_name=repo, path=".github/workflows", installation_id=installation_id - ) - contributors = await github_client.get_repository_contributors(repo, installation_id) if installation_id else [] - - state.repository_features = RepositoryFeatures( - has_contributing=False, - has_codeowners=bool(await github_client.get_file_content(repo, ".github/CODEOWNERS", installation_id)), - has_workflows=bool(workflows), - workflow_count=len(workflows or []), - language=(repo_data or {}).get("language"), - contributor_count=len(contributors), - pr_count=0, - ) - - -async def analyze_pr_history(state: RepositoryAnalysisState, max_prs: int) -> None: - """Fetch a small sample of recent pull requests for context.""" - repo = state.repository_full_name - installation_id = state.installation_id - prs = await github_client.list_pull_requests(repo, installation_id=installation_id, state="all", per_page=max_prs) - - samples: list[PullRequestSample] = [] - for pr in prs or []: - samples.append( - PullRequestSample( - number=pr.get("number", 0), - title=pr.get("title", ""), - state=pr.get("state", ""), - merged=bool(pr.get("merged_at")), - additions=pr.get("additions"), - deletions=pr.get("deletions"), - changed_files=pr.get("changed_files"), - ) - ) - - state.pr_samples = samples - state.repository_features.pr_count = len(samples) - - -async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> None: - """Fetch and parse CONTRIBUTING guidelines if present.""" - repo = state.repository_full_name - installation_id = state.installation_id - - content = await github_client.get_file_content( - repo, "CONTRIBUTING.md", installation_id - ) or await github_client.get_file_content(repo, ".github/CONTRIBUTING.md", installation_id) - - if not content: - state.contributing_analysis = ContributingGuidelinesAnalysis(content=None) - return - - lowered = content.lower() - state.contributing_analysis = ContributingGuidelinesAnalysis( - content=content, - has_pr_template="pr template" in lowered or "pull request template" in lowered, - has_issue_template="issue template" in lowered, - requires_tests="test" in lowered or "tests" in lowered, - requires_docs="docs" in lowered or "documentation" in lowered, - code_style_requirements=[ - req for req in ["lint", "format", "pep8", "flake8", "eslint", "prettier"] if req in lowered - ], - review_requirements=[req for req in ["review", "approval"] if req in lowered], - ) - - -def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]: - """Return a minimal, deterministic set of diff-aware rules.""" - recommendations: list[RuleRecommendation] = [] - - # Require tests when source code changes. - recommendations.append( - RuleRecommendation( - yaml_rule=textwrap.dedent( - """ - description: "Require tests when code changes" - enabled: true - severity: medium - event_types: - - pull_request - validators: - - type: diff_pattern - parameters: - file_patterns: - - "**/*.py" - - "**/*.ts" - - "**/*.tsx" - - "**/*.js" - - "**/*.go" - - type: related_tests - parameters: - search_paths: - - "**/tests/**" - - "**/*_test.py" - - "**/*.spec.ts" - - "**/*.test.js" - actions: - - type: warn - parameters: - message: "Please include or update tests for code changes." - """ - ).strip(), - confidence=0.74, - reasoning="Default guardrail for code changes without tests.", - strategy_used="static", ) ) @@ -196,48 +67,3 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme ) ) - return recommendations - - -def _render_rules_yaml(recommendations: list[RuleRecommendation]) -> str: - """Combine rule YAML snippets into a single YAML document.""" - yaml_blocks = [rec.yaml_rule.strip() for rec in recommendations] - return "\n\n---\n\n".join(yaml_blocks) - - -def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan: - """Create a default PR plan.""" - return PullRequestPlan( - branch_name="watchflow/rules", - base_branch="main", - commit_message="chore: add Watchflow rules", - pr_title="Add Watchflow rules", - pr_body="This PR adds Watchflow rule recommendations generated by Watchflow.", - ) - - -def validate_recommendations(state: RepositoryAnalysisState) -> None: - """Ensure generated YAML is valid.""" - for rec in state.recommendations: - yaml.safe_load(rec.yaml_rule) - - -def summarize_analysis( - state: RepositoryAnalysisState, request: RepositoryAnalysisRequest -) -> RepositoryAnalysisResponse: - """Build the final response.""" - rules_yaml = _render_rules_yaml(state.recommendations) - pr_plan = state.pr_plan or _default_pr_plan(state) - analysis_summary: dict[str, Any] = { - "repository_features": state.repository_features.model_dump(), - "contributing": state.contributing_analysis.model_dump(), - "pr_samples": [pr.model_dump() for pr in state.pr_samples[: request.max_prs]], - } - - return RepositoryAnalysisResponse( - repository_full_name=state.repository_full_name, - rules_yaml=rules_yaml, - recommendations=state.recommendations, - pr_plan=pr_plan, - analysis_summary=analysis_summary, - ) diff --git a/src/agents/repository_analysis_agent/prompts.py b/src/agents/repository_analysis_agent/prompts.py index 94bfe4a..8b13789 100644 --- a/src/agents/repository_analysis_agent/prompts.py +++ b/src/agents/repository_analysis_agent/prompts.py @@ -1,97 +1 @@ -from langchain_core.prompts import ChatPromptTemplate -CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT = ChatPromptTemplate.from_template(""" -You are a senior software engineer analyzing contributing guidelines to recommend appropriate repository governance rules. - -Analyze the following CONTRIBUTING.md content and extract patterns, requirements, and best practices that would benefit from automated enforcement via Watchflow rules. - -CONTRIBUTING.md Content: -{content} - -Your task is to extract: -1. Pull request requirements (templates, reviews, tests, etc.) -2. Code quality standards (linting, formatting, etc.) -3. Documentation requirements -4. Commit message conventions -5. Branch naming conventions -6. Testing requirements -7. Security practices - -Provide your analysis in the following JSON format: -{{ - "has_pr_template": boolean, - "has_issue_template": boolean, - "requires_tests": boolean, - "requires_docs": boolean, - "code_style_requirements": ["list", "of", "requirements"], - "review_requirements": ["list", "of", "requirements"] -}} - -Be thorough but only extract information that is explicitly mentioned or strongly implied in the guidelines. -""") - -REPOSITORY_ANALYSIS_PROMPT = ChatPromptTemplate.from_template(""" -You are analyzing a GitHub repository to recommend Watchflow rules based on its structure, workflows, and contributing patterns. - -Repository Information: -- Name: {repository_full_name} -- Primary Language: {language} -- Contributors: {contributor_count} -- Pull Requests: {pr_count} -- Issues: {issue_count} -- Has Workflows: {has_workflows} -- Has Branch Protection: {has_branch_protection} -- Has CODEOWNERS: {has_codeowners} - -Contributing Guidelines Analysis: -{contributing_analysis} - -Based on this repository profile, recommend appropriate Watchflow rules that would improve governance, quality, and security. - -Consider: -1. Code quality rules (linting, testing, formatting) -2. Security rules (dependency scanning, secret detection) -3. Process rules (PR reviews, branch protection, CI/CD) -4. Documentation rules (README updates, CHANGELOG) - -For each recommendation, provide: -- A valid Watchflow rule YAML -- Confidence score (0.0-1.0) -- Reasoning for the recommendation -- Source patterns that led to it -- Category and impact level - -Focus on rules that are most relevant to this repository's characteristics and would provide the most value. -""") - -RULE_GENERATION_PROMPT = ChatPromptTemplate.from_template(""" -Generate a valid Watchflow rule YAML based on the following specification: - -Category: {category} -Description: {description} -Parameters: {parameters} -Event Types: {event_types} -Severity: {severity} - -Generate a complete, valid Watchflow rule in YAML format that implements this specification. -Ensure the rule follows Watchflow YAML schema and is properly formatted. - -Watchflow Rule YAML Format: -```yaml -description: "Rule description" -enabled: true -severity: "medium" -event_types: - - pull_request -conditions: - - type: "condition_type" - parameters: - key: "value" -actions: - - type: "action_type" - parameters: - key: "value" -``` - -Make sure the rule is functional and follows best practices. -""") diff --git a/tests/unit/agents/test_repository_analysis_rendering.py b/tests/unit/agents/test_repository_analysis_rendering.py new file mode 100644 index 0000000..1f35d13 --- /dev/null +++ b/tests/unit/agents/test_repository_analysis_rendering.py @@ -0,0 +1,53 @@ +from src.agents.repository_analysis_agent.agent import RepositoryAnalysisAgent +from src.agents.repository_analysis_agent.models import RuleRecommendation + + +def test_build_rules_yaml_renders_rules(): + agent = RepositoryAnalysisAgent() + recs = [ + RuleRecommendation( + yaml_content="""description: "Rule A" +enabled: true +event_types: ["pull_request"] +parameters: + foo: bar +""", + confidence=0.9, + reasoning="test", + source_patterns=[], + category="quality", + estimated_impact="high", + ) + ] + + rendered = agent._build_rules_yaml(recs) + + assert rendered.startswith("rules:") + assert "description: \"Rule A\"" in rendered + # Ensure indentation under rules: + assert "\n description" in rendered + + +def test_build_pr_template_includes_repo_and_rules(): + agent = RepositoryAnalysisAgent() + recs = [ + RuleRecommendation( + yaml_content="""description: "Rule A" +enabled: true +event_types: ["pull_request"] +parameters: {} +""", + confidence=0.9, + reasoning="test", + source_patterns=[], + category="quality", + estimated_impact="high", + ) + ] + + pr_body = agent._build_pr_template("owner/repo", recs) + + assert "owner/repo" in pr_body + assert "Rule A" in pr_body + assert "Install the Watchflow GitHub App" in pr_body +