diff --git a/AGENTS.md b/AGENTS.md index 0c11191..e01dc12 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,7 +22,7 @@ Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema - **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**48 个 tools**(含 5 个 vault tools + 8 个代码分析工具 + 4 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 2 个 Agent 状态工具 + 1 个 streaming index 工具 + 1 个 oplog 工具);配置见 `mcp.json` +- **MCP Server**:stdio only,**49 个 tools**(含 5 个 vault tools + 8 个代码分析工具 + 4 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 2 个 Agent 状态工具 + 1 个 streaming index 工具 + 1 个 oplog 工具);配置见 `mcp.json` - **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` - **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink - **当前测试**:490+ workspace passed / 0 failed / 4 ignored(主 crate 390 + symbol-links 4 + sync-protocol 12 + core-types 3 + syncthing-client 2 + vault-frontmatter 5 + vault-wikilink 5 + workflow-interpolate 9 + workflow-model 2 + registry-health 3 + registry-metrics 4 + registry-workspace 5 + embedding 5 + skill-runtime-types 7 + skill-runtime-parser 3 + 其他 crates ~30);11/11 passed(integration `tests/cli.rs`) diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index b01a265..dcc6785 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -104,6 +104,7 @@ pub enum McpToolEnum { WorkflowRun(DevkitWorkflowRunTool), WorkflowStatus(DevkitWorkflowStatusTool), OplogQuery(DevkitOplogQueryTool), + Evaluate(DevkitEvaluateTool), } /// Stability tier for MCP tools. @@ -180,6 +181,7 @@ impl McpToolEnum { McpToolEnum::WorkflowRun(_) => ToolTier::Beta, McpToolEnum::WorkflowStatus(_) => ToolTier::Beta, McpToolEnum::OplogQuery(_) => ToolTier::Beta, + McpToolEnum::Evaluate(_) => ToolTier::Beta, } } } @@ -235,6 +237,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowRun(t) => t.name(), McpToolEnum::WorkflowStatus(t) => t.name(), McpToolEnum::OplogQuery(t) => t.name(), + McpToolEnum::Evaluate(t) => t.name(), } } @@ -288,6 +291,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowRun(t) => t.schema(), McpToolEnum::WorkflowStatus(t) => t.schema(), McpToolEnum::OplogQuery(t) => t.schema(), + McpToolEnum::Evaluate(t) => t.schema(), } } @@ -345,6 +349,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowRun(t) => t.invoke(args, ctx).await, McpToolEnum::WorkflowStatus(t) => t.invoke(args, ctx).await, McpToolEnum::OplogQuery(t) => t.invoke(args, ctx).await, + McpToolEnum::Evaluate(t) => t.invoke(args, ctx).await, } } } @@ -592,6 +597,7 @@ pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { McpToolEnum::WorkflowRun(DevkitWorkflowRunTool), McpToolEnum::WorkflowStatus(DevkitWorkflowStatusTool), McpToolEnum::OplogQuery(DevkitOplogQueryTool), + McpToolEnum::Evaluate(DevkitEvaluateTool), ]; for tool in all_tools { if let Some(allowed) = tiers diff --git a/src/mcp/tests.rs b/src/mcp/tests.rs index 2f30c1d..2d199ba 100644 --- a/src/mcp/tests.rs +++ b/src/mcp/tests.rs @@ -39,8 +39,9 @@ async fn test_tools_list() { let (mut ctx, _tmp) = test_ctx(); let resp = server.handle_request(req, &mut ctx).await.unwrap(); let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); - assert_eq!(tools.len(), 48); + assert_eq!(tools.len(), 49); let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); + assert!(names.contains(&"devkit_evaluate")); assert!(names.contains(&"devkit_scan")); assert!(names.contains(&"devkit_health")); assert!(names.contains(&"devkit_sync")); diff --git a/src/mcp/tools/evaluate.rs b/src/mcp/tools/evaluate.rs new file mode 100644 index 0000000..667e599 --- /dev/null +++ b/src/mcp/tools/evaluate.rs @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +//! devkit_evaluate: AI self-evaluation tool inspired by Claude Computer Use. +//! +//! After the AI makes code changes, it can call this tool to automatically +//! verify correctness via cargo check / clippy / fmt / test (no-run). +//! This closes the loop: AI acts → AI evaluates → AI decides next step. + +use super::super::McpTool; +use crate::storage::AppContext; +use serde_json::json; +use std::process::Command; +use std::time::Instant; + +#[derive(Clone)] +pub struct DevkitEvaluateTool; + +impl McpTool for DevkitEvaluateTool { + fn name(&self) -> &'static str { + "devkit_evaluate" + } + + fn schema(&self) -> serde_json::Value { + json!({ + "description": r#"Run automated quality checks and return a structured report. + +Use this when the AI (or user) wants to: +- Verify that recent changes compile without errors +- Check for clippy warnings or formatting issues +- Get a quick quality assessment before committing or merging + +Scopes: +- "check_only" (default, fastest): cargo check + clippy + fmt — ~10-30s +- "lib": cargo test --lib --no-run + clippy + fmt — verifies test compilation +- "full": cargo test --all-targets --no-run + clippy + fmt — verifies all targets + +Returns a structured JSON report with success/failure per check and captured output snippets."#, + "inputSchema": { + "type": "object", + "properties": { + "scope": { + "type": "string", + "enum": ["check_only", "lib", "full"], + "description": "Evaluation scope. Default: check_only" + } + } + } + }) + } + + async fn invoke( + &self, + args: serde_json::Value, + _ctx: &mut AppContext, + ) -> anyhow::Result { + let scope = args.get("scope").and_then(|v| v.as_str()).unwrap_or("check_only"); + + let start = Instant::now(); + + let check = run_cargo_check()?; + let clippy = run_cargo_clippy()?; + let fmt = run_cargo_fmt_check()?; + let test_compile = if scope == "lib" { + Some(run_cargo_test_lib_no_run()?) + } else if scope == "full" { + Some(run_cargo_test_all_no_run()?) + } else { + None + }; + + let overall_success = check.success + && clippy.success + && fmt.success + && test_compile.as_ref().is_none_or(|t| t.success); + + let total_ms = start.elapsed().as_millis() as i64; + + Ok(json!({ + "success": overall_success, + "scope": scope, + "check": check.into_json(), + "clippy": clippy.into_json(), + "fmt": fmt.into_json(), + "test_compile": test_compile.map(|t| t.into_json()), + "total_duration_ms": total_ms, + })) + } +} + +struct CheckResult { + success: bool, + duration_ms: i64, + output: String, +} + +impl CheckResult { + fn into_json(self) -> serde_json::Value { + json!({ + "success": self.success, + "duration_ms": self.duration_ms, + "output_preview": truncate_output(&self.output, 2000), + }) + } +} + +fn run_cargo_check() -> anyhow::Result { + let start = Instant::now(); + let (success, output) = run_command(Command::new("cargo").args(["check", "--all-targets"]))?; + Ok(CheckResult { + success, + duration_ms: start.elapsed().as_millis() as i64, + output, + }) +} + +fn run_cargo_clippy() -> anyhow::Result { + let start = Instant::now(); + let (success, output) = run_command(Command::new("cargo").args([ + "clippy", + "--all-targets", + "--", + "-D", + "warnings", + ]))?; + Ok(CheckResult { + success, + duration_ms: start.elapsed().as_millis() as i64, + output, + }) +} + +fn run_cargo_fmt_check() -> anyhow::Result { + let start = Instant::now(); + let (success, output) = run_command(Command::new("cargo").args(["fmt", "--check"]))?; + Ok(CheckResult { + success, + duration_ms: start.elapsed().as_millis() as i64, + output, + }) +} + +fn run_cargo_test_lib_no_run() -> anyhow::Result { + let start = Instant::now(); + let (success, output) = run_command(Command::new("cargo").args(["test", "--lib", "--no-run"]))?; + Ok(CheckResult { + success, + duration_ms: start.elapsed().as_millis() as i64, + output, + }) +} + +fn run_cargo_test_all_no_run() -> anyhow::Result { + let start = Instant::now(); + let (success, output) = + run_command(Command::new("cargo").args(["test", "--all-targets", "--no-run"]))?; + Ok(CheckResult { + success, + duration_ms: start.elapsed().as_millis() as i64, + output, + }) +} + +fn run_command(cmd: &mut Command) -> anyhow::Result<(bool, String)> { + let output = cmd.output()?; + let success = output.status.success(); + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let combined = if stdout.is_empty() { + stderr + } else if stderr.is_empty() { + stdout + } else { + format!("{stdout}\n{stderr}") + }; + Ok((success, combined)) +} + +fn truncate_output(s: &str, max_len: usize) -> String { + if s.len() <= max_len { + s.to_string() + } else { + format!("{}\n...[truncated {} chars]", &s[..max_len], s.len() - max_len) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_output() { + assert_eq!(truncate_output("hello", 10), "hello"); + let long = "a".repeat(3000); + let truncated = truncate_output(&long, 2000); + assert!(truncated.contains("[truncated")); + assert!(truncated.len() < 2100); + } + + #[test] + fn test_evaluate_tool_name() { + let t = DevkitEvaluateTool; + assert_eq!(t.name(), "devkit_evaluate"); + } +} diff --git a/src/mcp/tools/mod.rs b/src/mcp/tools/mod.rs index 77586fc..264a7de 100644 --- a/src/mcp/tools/mod.rs +++ b/src/mcp/tools/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2026 juice094 pub mod context; +pub mod evaluate; pub mod known_limit; pub mod oplog; pub mod query; @@ -28,6 +29,7 @@ pub use vault::*; pub use workflow::*; pub use code_analysis::*; +pub use evaluate::*; pub use external::*; pub use knowledge::*; pub use search::*;