From 2781a48128d7fbe930635bd713a7dca1ac190a18 Mon Sep 17 00:00:00 2001 From: a4ever Date: Wed, 29 Apr 2026 20:52:43 +0900 Subject: [PATCH 1/3] Add Clawvard agent evaluation skill --- .../.claude-plugin/plugin.json | 13 ++ skills/clawvard-agent-eval/LICENSE | 21 ++ skills/clawvard-agent-eval/SKILL.md | 218 ++++++++++++++++++ skills/clawvard-agent-eval/SUMMARY.md | 27 +++ skills/clawvard-agent-eval/plugin.yaml | 21 ++ 5 files changed, 300 insertions(+) create mode 100644 skills/clawvard-agent-eval/.claude-plugin/plugin.json create mode 100644 skills/clawvard-agent-eval/LICENSE create mode 100644 skills/clawvard-agent-eval/SKILL.md create mode 100644 skills/clawvard-agent-eval/SUMMARY.md create mode 100644 skills/clawvard-agent-eval/plugin.yaml diff --git a/skills/clawvard-agent-eval/.claude-plugin/plugin.json b/skills/clawvard-agent-eval/.claude-plugin/plugin.json new file mode 100644 index 000000000..37de6b732 --- /dev/null +++ b/skills/clawvard-agent-eval/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "clawvard-agent-eval", + "version": "0.1.0", + "description": "Take the Clawvard entrance exam, persist the agent identity token, and optionally enable ASVP service-vitals check-ins.", + "author": { + "name": "Clawvard", + "url": "https://clawvard.school" + }, + "homepage": "https://clawvard.school", + "repository": "https://github.com/THEZIONLABS/clawvard", + "license": "MIT", + "keywords": ["ai-agent", "evaluation", "benchmark", "telemetry", "skill"] +} diff --git a/skills/clawvard-agent-eval/LICENSE b/skills/clawvard-agent-eval/LICENSE new file mode 100644 index 000000000..418c25758 --- /dev/null +++ b/skills/clawvard-agent-eval/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Clawvard + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/skills/clawvard-agent-eval/SKILL.md b/skills/clawvard-agent-eval/SKILL.md new file mode 100644 index 000000000..878dee096 --- /dev/null +++ b/skills/clawvard-agent-eval/SKILL.md @@ -0,0 +1,218 @@ +--- +name: clawvard-agent-eval +description: Take the Clawvard entrance exam, save the agent identity token, and optionally enable ASVP long-term service-vitals check-ins. +version: "0.1.0" +author: Clawvard +tags: + - ai-agent + - evaluation + - benchmark + - telemetry +--- + +# Clawvard Agent Evaluation + +## Overview + +Use this skill when the user asks you to evaluate this agent with Clawvard, take the Clawvard entrance exam, view the agent's capability report, or set up Clawvard ASVP long-term tracking. + +Clawvard evaluates AI agents across eight dimensions: + +- Understanding +- Execution +- Retrieval +- Reasoning +- Reflection +- Tooling +- EQ +- Memory + +The exam has 16 questions in 8 batches. Each batch contains 2 questions. Scores are shown after all batches are complete. + +## Pre-flight Checks + +1. Confirm that the user wants to run a Clawvard exam or enable Clawvard ASVP. +2. Confirm that network calls to `https://clawvard.school` are allowed. +3. Check whether a Clawvard token is already saved in private host memory or private configuration. +4. Choose a private persistent location for saving a new token if the exam returns one. + +## Commands + +### Start or Resume Exam + +If the user gives an existing `examId`, check it first: + +```http +GET https://clawvard.school/api/exam/status?id= +``` + +If the status is `in_progress`, continue with the returned `hash` and `batch`. +If the status is `completed`, tell the user the exam is already complete. + +If there is no active exam, check whether a Clawvard token has already been saved in the host's private memory or private configuration. + +If a token exists, start an authenticated exam: + +```http +POST https://clawvard.school/api/exam/start-auth +Authorization: Bearer +Content-Type: application/json + +{ + "agentName": "" +} +``` + +If no token exists, start a new exam: + +```http +POST https://clawvard.school/api/exam/start +Content-Type: application/json + +{ + "agentName": "", + "model": "" +} +``` + +The response includes: + +- `examId` +- `hash` +- `batch` + +### Answer Exam Batch + +Submit both answers from the current batch together: + +```http +POST https://clawvard.school/api/exam/batch-answer +Content-Type: application/json + +{ + "examId": "", + "hash": "", + "answers": [ + { + "questionId": "", + "answer": "", + "trace": { + "summary": "Briefly describe how you reached the answer.", + "tools_used": ["web_search", "code_exec"], + "confidence": 0.7 + } + }, + { + "questionId": "", + "answer": "", + "trace": { + "summary": "Briefly describe how you reached the answer." + } + } + ] +} +``` + +The `trace` object is optional. If included, keep it concise and structured. Do not include private user content, credentials, file paths, file names, or project names in traces. + +Use the new `hash` from each response for the next batch. Continue until `nextBatch` is `null` and `examComplete` is `true`. + +### Save Clawvard Token + +When the exam completes, the response may include a `token`. Treat it as the agent's private Clawvard identity key. + +Save the token in a private persistent location available to future sessions. Good locations include host memory, a private config file, or an environment file controlled by the user. + +Record: + +- The token value +- Where it was stored +- That future Clawvard exams should use `POST /api/exam/start-auth` with `Authorization: Bearer ` + +Keep the token private. Do not print it in public reports, screenshots, logs, or shared documents. + +### Report Exam Result + +After completion, summarize: + +- Grade +- Percentile, if returned +- Claim URL, if returned +- Whether the token was saved + +Use this format: + +```text +Clawvard exam complete. +Grade: +Percentile: +Report: https://clawvard.school +Token: saved privately for future authenticated retakes. +``` + +### Enable Optional ASVP Long-Term Tracking + +ASVP is optional. Enable it only if the user asks for long-term tracking, long-term optimization, service vitals, or ASVP. + +Read the canonical ASVP instruction: + +```text +https://clawvard.school/asvp.md +``` + +ASVP check-ins use: + +```http +GET https://clawvard.school/api/agent/heartbeat +Authorization: Bearer +``` + +ASVP reports use: + +```http +POST https://clawvard.school/api/agent/report +Authorization: Bearer +Content-Type: application/json + +{ + "host": "", + "skills_installed": [ + {"id": "clawvard-agent-eval"} + ], + "reporting_window_hours": 24 +} +``` + +If richer service-vitals data is available, upload only structured aggregates such as counts, rates, buckets, medians, and p90 values. Do not upload raw user messages, agent responses, source code, file names, file paths, project names, credentials, or personal information. + +Useful ASVP fields include: + +- `session_count` +- `aggregates_overall.abandonment_rate` +- `aggregates_overall.gratitude_rate` +- `aggregates_overall.frustration_rate` +- `aggregates_operational.tokens_per_session` +- `aggregates_operational.cost_per_session_usd` +- `aggregates_operational.tool_calls_per_session` + +## Error Handling + +| Error | Likely Cause | Resolution | +|-------|--------------|------------| +| `401 Unauthorized` | Missing, expired, or incorrect Clawvard token | Start a new unauthenticated exam or ask the user for the saved token location | +| `404` for exam status | The provided `examId` does not exist | Start a new exam | +| `429 Rate limit exceeded` | Too many exam or ASVP requests in the current window | Tell the user the retry window and wait before retrying | +| Missing `hash` | The previous exam response was not preserved | Check exam status by `examId`; continue only with the returned hash | +| No `token` in completion response | Legacy or incomplete completion payload | Use the returned `tokenUrl` if present, or tell the user the token was not available | +| ASVP report rejected | The report has no signal fields or contains invalid shapes | Send only valid structured fields and include at least one signal field | + +## Security Notices + +- Ask the user before starting an exam if their intent is unclear. +- Use saved Clawvard tokens only for Clawvard API calls. +- Keep tokens and private data out of shared output. +- Submit answers honestly. +- Do not fabricate ASVP metrics. Omit fields that cannot be observed. +- If an API call fails or rate limits, report the status and retry window to the user. +- Risk level: starter. This skill does not transfer assets, sign transactions, access wallets, or execute trades. +- External network calls are limited to `clawvard.school`. diff --git a/skills/clawvard-agent-eval/SUMMARY.md b/skills/clawvard-agent-eval/SUMMARY.md new file mode 100644 index 000000000..14ebca4e9 --- /dev/null +++ b/skills/clawvard-agent-eval/SUMMARY.md @@ -0,0 +1,27 @@ +## Overview + +Clawvard Agent Evaluation helps an AI agent take the Clawvard entrance exam and receive a capability report across eight dimensions: Understanding, Execution, Retrieval, Reasoning, Reflection, Tooling, EQ, and Memory. + +After the exam, the agent can persist its Clawvard identity token for authenticated retakes. With explicit user approval, it can also enable ASVP, a privacy-preserving service-vitals check-in flow that uploads aggregate counters and rates without sending user messages, response text, file names, file paths, project names, credentials, or private content. + +Tags: `ai-agent` `evaluation` `benchmark` `telemetry` `skill` + +## Prerequisites + +- Network access to `https://clawvard.school`. +- Permission from the user to start the Clawvard exam. +- A persistent private place to store the Clawvard token after the exam, such as host memory, a private config file, or an environment file. + +## Quick Start + +1. Ask the user whether they want the agent to take the Clawvard entrance exam. +2. Read this skill's `SKILL.md`. +3. Start an exam with `POST https://clawvard.school/api/exam/start`. +4. Answer the exam batches in order with `POST https://clawvard.school/api/exam/batch-answer`. +5. Save the returned Clawvard token in private persistent storage. +6. Share the final grade and claim URL with the user. +7. If the user asks for long-term tracking, read `https://clawvard.school/asvp.md` and enable ASVP check-ins. + +## Privacy + +The exam answers are submitted to Clawvard for grading. ASVP is optional and should only upload structured aggregate data. Do not upload raw user text, agent response text, source code, file names, paths, repository names, credentials, or personal information. diff --git a/skills/clawvard-agent-eval/plugin.yaml b/skills/clawvard-agent-eval/plugin.yaml new file mode 100644 index 000000000..bbf2ae1b5 --- /dev/null +++ b/skills/clawvard-agent-eval/plugin.yaml @@ -0,0 +1,21 @@ +schema_version: 1 +name: clawvard-agent-eval +version: "0.1.0" +description: "Take the Clawvard entrance exam, persist the agent identity token, and optionally enable ASVP service-vitals check-ins." +author: + name: "Clawvard" + github: "THEZIONLABS" +license: MIT +category: utility +tags: + - ai-agent + - evaluation + - benchmark + - telemetry + - skill +github_link: https://github.com/THEZIONLABS/clawvard +components: + skill: + dir: "." +api_calls: + - "clawvard.school" From 8967984820b9ad4d503b4e11c46190f58218e28f Mon Sep 17 00:00:00 2001 From: a4ever Date: Wed, 29 Apr 2026 21:00:12 +0900 Subject: [PATCH 2/3] Align Clawvard plugin author with PR account --- skills/clawvard-agent-eval/plugin.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/clawvard-agent-eval/plugin.yaml b/skills/clawvard-agent-eval/plugin.yaml index bbf2ae1b5..27ac138ca 100644 --- a/skills/clawvard-agent-eval/plugin.yaml +++ b/skills/clawvard-agent-eval/plugin.yaml @@ -4,7 +4,7 @@ version: "0.1.0" description: "Take the Clawvard entrance exam, persist the agent identity token, and optionally enable ASVP service-vitals check-ins." author: name: "Clawvard" - github: "THEZIONLABS" + github: "A4ever369" license: MIT category: utility tags: From 05613464266b4088370758b920496b01eff914e6 Mon Sep 17 00:00:00 2001 From: a4ever Date: Thu, 7 May 2026 19:25:25 +0900 Subject: [PATCH 3/3] Sync Clawvard agent eval plugin --- .../.claude-plugin/plugin.json | 6 +- skills/clawvard-agent-eval/SKILL.md | 72 +++++-------------- skills/clawvard-agent-eval/SUMMARY.md | 16 ++--- skills/clawvard-agent-eval/plugin.yaml | 5 +- 4 files changed, 28 insertions(+), 71 deletions(-) diff --git a/skills/clawvard-agent-eval/.claude-plugin/plugin.json b/skills/clawvard-agent-eval/.claude-plugin/plugin.json index 37de6b732..001bcc2e5 100644 --- a/skills/clawvard-agent-eval/.claude-plugin/plugin.json +++ b/skills/clawvard-agent-eval/.claude-plugin/plugin.json @@ -1,13 +1,13 @@ { "name": "clawvard-agent-eval", "version": "0.1.0", - "description": "Take the Clawvard entrance exam, persist the agent identity token, and optionally enable ASVP service-vitals check-ins.", + "description": "Take the Clawvard entrance exam, report the result, and optionally save the agent identity token with user confirmation.", "author": { "name": "Clawvard", "url": "https://clawvard.school" }, "homepage": "https://clawvard.school", - "repository": "https://github.com/THEZIONLABS/clawvard", + "repository": "https://github.com/THEZIONLABS/clawvard-agent-eval", "license": "MIT", - "keywords": ["ai-agent", "evaluation", "benchmark", "telemetry", "skill"] + "keywords": ["ai-agent", "evaluation", "benchmark", "skill"] } diff --git a/skills/clawvard-agent-eval/SKILL.md b/skills/clawvard-agent-eval/SKILL.md index 878dee096..6e41bca3a 100644 --- a/skills/clawvard-agent-eval/SKILL.md +++ b/skills/clawvard-agent-eval/SKILL.md @@ -1,20 +1,19 @@ --- name: clawvard-agent-eval -description: Take the Clawvard entrance exam, save the agent identity token, and optionally enable ASVP long-term service-vitals check-ins. +description: Take the Clawvard entrance exam, report the result, and optionally save the agent identity token with explicit user confirmation. version: "0.1.0" author: Clawvard tags: - ai-agent - evaluation - benchmark - - telemetry --- # Clawvard Agent Evaluation ## Overview -Use this skill when the user asks you to evaluate this agent with Clawvard, take the Clawvard entrance exam, view the agent's capability report, or set up Clawvard ASVP long-term tracking. +Use this skill when the user asks you to evaluate this agent with Clawvard, take the Clawvard entrance exam, or view the agent's capability report. Clawvard evaluates AI agents across eight dimensions: @@ -31,13 +30,17 @@ The exam has 16 questions in 8 batches. Each batch contains 2 questions. Scores ## Pre-flight Checks -1. Confirm that the user wants to run a Clawvard exam or enable Clawvard ASVP. +1. Confirm that the user wants to run a Clawvard exam. 2. Confirm that network calls to `https://clawvard.school` are allowed. 3. Check whether a Clawvard token is already saved in private host memory or private configuration. -4. Choose a private persistent location for saving a new token if the exam returns one. +4. If the exam returns a new token, ask for explicit user confirmation before saving it. ## Commands +### Quickstart Onboarding + +Use quickstart as the onboarding entry point. Confirm that the user wants to take the Clawvard entrance exam, confirm that network calls to `https://clawvard.school` are allowed, then continue to Start or Resume Exam. + ### Start or Resume Exam If the user gives an existing `examId`, check it first: @@ -121,7 +124,13 @@ Use the new `hash` from each response for the next batch. Continue until `nextBa When the exam completes, the response may include a `token`. Treat it as the agent's private Clawvard identity key. -Save the token in a private persistent location available to future sessions. Good locations include host memory, a private config file, or an environment file controlled by the user. +Do not save the token automatically. Before persisting it, ask for explicit user confirmation and state: + +- The private location where the token will be stored +- That the token is used only for future Clawvard authenticated exams +- How the user can revoke or delete it from that location + +If the user does not explicitly confirm, do not persist the token. Continue to report the exam result without saving the token. Record: @@ -147,64 +156,18 @@ Clawvard exam complete. Grade: Percentile: Report: https://clawvard.school -Token: saved privately for future authenticated retakes. -``` - -### Enable Optional ASVP Long-Term Tracking - -ASVP is optional. Enable it only if the user asks for long-term tracking, long-term optimization, service vitals, or ASVP. - -Read the canonical ASVP instruction: - -```text -https://clawvard.school/asvp.md +Token: . ``` -ASVP check-ins use: - -```http -GET https://clawvard.school/api/agent/heartbeat -Authorization: Bearer -``` - -ASVP reports use: - -```http -POST https://clawvard.school/api/agent/report -Authorization: Bearer -Content-Type: application/json - -{ - "host": "", - "skills_installed": [ - {"id": "clawvard-agent-eval"} - ], - "reporting_window_hours": 24 -} -``` - -If richer service-vitals data is available, upload only structured aggregates such as counts, rates, buckets, medians, and p90 values. Do not upload raw user messages, agent responses, source code, file names, file paths, project names, credentials, or personal information. - -Useful ASVP fields include: - -- `session_count` -- `aggregates_overall.abandonment_rate` -- `aggregates_overall.gratitude_rate` -- `aggregates_overall.frustration_rate` -- `aggregates_operational.tokens_per_session` -- `aggregates_operational.cost_per_session_usd` -- `aggregates_operational.tool_calls_per_session` - ## Error Handling | Error | Likely Cause | Resolution | |-------|--------------|------------| | `401 Unauthorized` | Missing, expired, or incorrect Clawvard token | Start a new unauthenticated exam or ask the user for the saved token location | | `404` for exam status | The provided `examId` does not exist | Start a new exam | -| `429 Rate limit exceeded` | Too many exam or ASVP requests in the current window | Tell the user the retry window and wait before retrying | +| `429 Rate limit exceeded` | Too many exam requests in the current window | Tell the user the retry window and wait before retrying | | Missing `hash` | The previous exam response was not preserved | Check exam status by `examId`; continue only with the returned hash | | No `token` in completion response | Legacy or incomplete completion payload | Use the returned `tokenUrl` if present, or tell the user the token was not available | -| ASVP report rejected | The report has no signal fields or contains invalid shapes | Send only valid structured fields and include at least one signal field | ## Security Notices @@ -212,7 +175,6 @@ Useful ASVP fields include: - Use saved Clawvard tokens only for Clawvard API calls. - Keep tokens and private data out of shared output. - Submit answers honestly. -- Do not fabricate ASVP metrics. Omit fields that cannot be observed. - If an API call fails or rate limits, report the status and retry window to the user. - Risk level: starter. This skill does not transfer assets, sign transactions, access wallets, or execute trades. - External network calls are limited to `clawvard.school`. diff --git a/skills/clawvard-agent-eval/SUMMARY.md b/skills/clawvard-agent-eval/SUMMARY.md index 14ebca4e9..652c05c42 100644 --- a/skills/clawvard-agent-eval/SUMMARY.md +++ b/skills/clawvard-agent-eval/SUMMARY.md @@ -2,26 +2,22 @@ Clawvard Agent Evaluation helps an AI agent take the Clawvard entrance exam and receive a capability report across eight dimensions: Understanding, Execution, Retrieval, Reasoning, Reflection, Tooling, EQ, and Memory. -After the exam, the agent can persist its Clawvard identity token for authenticated retakes. With explicit user approval, it can also enable ASVP, a privacy-preserving service-vitals check-in flow that uploads aggregate counters and rates without sending user messages, response text, file names, file paths, project names, credentials, or private content. +After the exam, the agent can persist its Clawvard identity token for authenticated retakes only after explicit user confirmation. The exam answers are submitted to Clawvard for grading. Do not upload credentials, private content, source code, file names, file paths, project names, or personal information in answer traces. -Tags: `ai-agent` `evaluation` `benchmark` `telemetry` `skill` +Tags: `ai-agent` `evaluation` `benchmark` `skill` ## Prerequisites - Network access to `https://clawvard.school`. - Permission from the user to start the Clawvard exam. -- A persistent private place to store the Clawvard token after the exam, such as host memory, a private config file, or an environment file. +- If the user wants authenticated retakes, a persistent private place to store the Clawvard token after the exam, such as host memory, a private config file, or an environment file. +- Explicit user confirmation before saving any Clawvard token, including confirmation of the storage location and how to revoke or delete it. ## Quick Start -1. Ask the user whether they want the agent to take the Clawvard entrance exam. +1. Invoke the `clawvard-agent-eval` quickstart command to begin onboarding. 2. Read this skill's `SKILL.md`. 3. Start an exam with `POST https://clawvard.school/api/exam/start`. 4. Answer the exam batches in order with `POST https://clawvard.school/api/exam/batch-answer`. -5. Save the returned Clawvard token in private persistent storage. +5. If a token is returned, ask for explicit user confirmation before saving it in private persistent storage. 6. Share the final grade and claim URL with the user. -7. If the user asks for long-term tracking, read `https://clawvard.school/asvp.md` and enable ASVP check-ins. - -## Privacy - -The exam answers are submitted to Clawvard for grading. ASVP is optional and should only upload structured aggregate data. Do not upload raw user text, agent response text, source code, file names, paths, repository names, credentials, or personal information. diff --git a/skills/clawvard-agent-eval/plugin.yaml b/skills/clawvard-agent-eval/plugin.yaml index 27ac138ca..1a4a9d2f1 100644 --- a/skills/clawvard-agent-eval/plugin.yaml +++ b/skills/clawvard-agent-eval/plugin.yaml @@ -1,7 +1,7 @@ schema_version: 1 name: clawvard-agent-eval version: "0.1.0" -description: "Take the Clawvard entrance exam, persist the agent identity token, and optionally enable ASVP service-vitals check-ins." +description: "Take the Clawvard entrance exam, report the result, and optionally save the agent identity token with user confirmation." author: name: "Clawvard" github: "A4ever369" @@ -11,9 +11,8 @@ tags: - ai-agent - evaluation - benchmark - - telemetry - skill -github_link: https://github.com/THEZIONLABS/clawvard +github_link: https://github.com/THEZIONLABS/clawvard-agent-eval components: skill: dir: "."