Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions docs/samples/ab-report-otto.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
<!doctype html><meta charset=utf-8>
<title>A/B report — io.pilot.otto</title>
<style>
:root{--ink:#0b0b0a;--dim:#6b6b63;--line:#e6e4da;--bg:#faf9f5;--ok:#1a7f37;--bad:#cf222e;--van:#8250df;--pil:#0969da}
*{box-sizing:border-box}body{font:14px/1.5 -apple-system,Inter,system-ui,sans-serif;color:var(--ink);background:var(--bg);max-width:1080px;margin:0 auto;padding:32px 24px}
h1{font-weight:600;margin:0 0 4px}h3{margin:0 0 2px}.sub{color:var(--dim);margin:0 0 24px}
table{border-collapse:collapse;width:100%;margin:12px 0 28px;background:#fff;border:1px solid var(--line);border-radius:8px;overflow:hidden}
th,td{padding:8px 12px;text-align:left;border-bottom:1px solid var(--line);font-size:13px}th{background:#f3f1ea;font-weight:600}td.r{text-align:right;font-variant-numeric:tabular-nums}
.pair{background:#fff;border:1px solid var(--line);border-radius:10px;padding:18px;margin:0 0 18px}
.note{color:var(--dim);margin:0 0 12px;font-size:13px}
.grid{display:grid;grid-template-columns:1fr 1fr;gap:14px}
.h{font-weight:600;font-size:12px;text-transform:uppercase;letter-spacing:.04em;margin-bottom:6px}.h.vanilla{color:var(--van)}.h.pilot{color:var(--pil)}
.cmd{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;background:#0b0b0a;color:#e8e6df;padding:8px 10px;border-radius:6px 6px 0 0;white-space:pre-wrap;word-break:break-all}
.meta{display:flex;gap:10px;align-items:center;background:#1c1c1a;padding:5px 10px}
.badge{font-size:11px;font-weight:600;padding:1px 7px;border-radius:99px;color:#fff}.badge.ok{background:var(--ok)}.badge.bad{background:var(--bad)}
.t{color:#b9b7ad;font-size:12px;font-variant-numeric:tabular-nums}
pre{margin:0;background:#111110;color:#d6d4cb;padding:10px;border-radius:0 0 6px 6px;font:12px/1.45 ui-monospace,Menlo,monospace;white-space:pre-wrap;word-break:break-word;max-height:340px;overflow:auto}
.dim{color:#8a887e}.delta{margin-top:10px;color:var(--dim);font-size:13px}
.help{background:#fff;border:1px solid var(--line);border-radius:10px;padding:18px;margin:0 0 18px}
.cols2{display:grid;grid-template-columns:1fr 1fr;gap:14px}
@media(max-width:760px){.grid,.cols2{grid-template-columns:1fr}}
</style>
<h1>Vanilla vs Pilot — A/B report</h1>
<p class="sub">App <b>io.pilot.otto</b> · delivered from the Pilot R2 artifact registry · generated by scripts/ab_report.py</p>

<h3>Summary</h3>
<table><tr><th>Command</th><th class=r>Vanilla (ms)</th><th class=r>Pilot (ms)</th><th class=r>Δ overhead</th><th>Match</th></tr><tr><td>Version</td><td class='r'>240</td><td class='r'>695</td><td class='r'>+455</td><td>✓</td></tr><tr><td>Relay status</td><td class='r'>124</td><td class='r'>379</td><td class='r'>+255</td><td>✓</td></tr><tr><td>Controller client status</td><td class='r'>125</td><td class='r'>481</td><td class='r'>+356</td><td>✓</td></tr><tr><td>Discover node commands</td><td class='r'>134</td><td class='r'>1098</td><td class='r'>+964</td><td>⚠</td></tr></table>

<h3>Adapter-generated help <span class=dim style="font-weight:400">— otto.help (local, no backend), 353 ms</span></h3>
<div class="help"><div class="cols2">
<div><div class="h pilot">Pilot · otto.help (generated by the adapter)</div><pre>{
&quot;app&quot;: &quot;io.pilot.otto&quot;,
&quot;version&quot;: &quot;0.20.0&quot;,
&quot;description&quot;: &quot;Drive real Chrome tabs from an agent: extract page content as markdown or HTML, run site commands (Reddit, LinkedIn, Hacker News, Google), screenshot pages, and inspect relay and node status \u2014 over a relay to a browser extension, no headless farm. Plus a passthrough exec for any otto subcommand.&quot;,
&quot;duration_classes&quot;: {
&quot;fast&quot;: &quot;&lt;~1s \u2014 status or cheap call&quot;,
&quot;med&quot;: &quot;~1-5s \u2014 moderate work&quot;,
&quot;slow&quot;: &quot;~5-30s \u2014 heavy / multi-step&quot;
},
&quot;methods&quot;: [
{
&quot;method&quot;: &quot;otto.exec&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Run any otto subcommand. Payload is {\&quot;args\&quot;:[...]} \u2014 the verbatim otto argv. Use this for the full CLI surface beyond the curated methods (config, client register/login/remove, pair, listener unsubscribe, extension update, agent install, site-filtered `commands list --site`, filtered `logs list`, etc.). Add \&quot;--json\&quot; where the command supports it. Example args: [\&quot;commands\&quot;,\&quot;list\&quot;,\&quot;--site\&quot;,\&quot;reddit.com\&quot;,\&quot;--json\&quot;]. Note: interactive/streaming subcommands (setup, settings, logs follow, listener subscribe-network, test with stream/wait flags, mcp serve, start --attached) are not suitable over one-shot IPC.&quot;,
&quot;params&quot;: {
&quot;args&quot;: &quot;array (required) \u2014 verbatim otto argv, e.g. [\&quot;status\&quot;,\&quot;--nodes\&quot;,\&quot;--json\&quot;]&quot;
},
&quot;duration&quot;: &quot;med&quot;
},
{
&quot;method&quot;: &quot;otto.status&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Relay daemon status as JSON: running pid, port, uptime, log path, and the list of currently connected browser node IDs. The right preflight before any page command \u2014 an empty node list means no Chrome extension node is paired/online.&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.commands&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;List the automation commands the connected node(s) expose, as JSON. Use to learn which site commands are available before calling otto.test. For a single site, use otto.exec with [\&quot;commands\&quot;,\&quot;list\&quot;,\&quot;--site\&quot;,\&quot;&lt;domain&gt;\&quot;,\&quot;--json\&quot;].&quot;,
&quot;duration&quot;: &quot;med&quot;
},
{
&quot;method&quot;: &quot;otto.extract&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Extract the readable content of a web page through a live paired browser tab, as JSON markdown. Opens a temporary tab on the node, extracts, and closes it. Requires a running relay and a paired Chrome node.&quot;,
&quot;params&quot;: {
&quot;url&quot;: &quot;string (required) \u2014 page URL to extract from&quot;
},
&quot;duration&quot;: &quot;slow&quot;
},
{
&quot;method&quot;: &quot;otto.extract.format&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Extract page content in a chosen format: markdown, distilled_html, clean_html, raw_html, or text. Returns JSON. Like otto.extract but lets you pick the output representation.&quot;,
&quot;params&quot;: {
&quot;format&quot;: &quot;string (required) \u2014 markdown | distilled_html | clean_html | raw_html | text&quot;,
&quot;url&quot;: &quot;string (required) \u2014 page URL to extract from&quot;
},
&quot;duration&quot;: &quot;slow&quot;
},
{
&quot;method&quot;: &quot;otto.screenshot&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Capture a screenshot of a page through a live browser tab; returns JSON (base64 PNG in the envelope, no local Preview window). Requires a running relay and a paired Chrome node.&quot;,
&quot;params&quot;: {
&quot;url&quot;: &quot;string (required) \u2014 page URL to screenshot&quot;
},
&quot;duration&quot;: &quot;slow&quot;
},
{
&quot;method&quot;: &quot;otto.test&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Run a registered site command on a browser node (opens a tab, runs the command, returns JSON). Provide site (e.g. reddit.com), command id (e.g. getPosts), and a JSON payload string (use {} for none, e.g. {\&quot;limit\&quot;:10}). One-shot; requires relay + paired node.&quot;,
&quot;params&quot;: {
&quot;command&quot;: &quot;string (required) \u2014 command id, e.g. getPosts&quot;,
&quot;payload&quot;: &quot;string (required) \u2014 command input JSON object string; use {} if none&quot;,
&quot;site&quot;: &quot;string (required) \u2014 website domain, e.g. reddit.com&quot;
},
&quot;duration&quot;: &quot;slow&quot;
},
{
&quot;method&quot;: &quot;otto.cmd&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Send a single raw action to the target browser node and return the full JSON envelope. Provide the action name and a JSON payload string (use {} for none). Use for low-level primitives (e.g. primitive.tab.open) not covered by a curated method.&quot;,
&quot;params&quot;: {
&quot;action&quot;: &quot;string (required) \u2014 action name, e.g. command.list or primitive.tab.open&quot;,
&quot;payload&quot;: &quot;string (required) \u2014 JSON object string for the action payload; use {} if none&quot;
},
&quot;duration&quot;: &quot;med&quot;
},
{
&quot;method&quot;: &quot;otto.logs&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Read recent relay logs as JSON. For filtered queries (level/source/since/request-id) use otto.exec with [\&quot;logs\&quot;,\&quot;list\&quot;,...,\&quot;--json\&quot;].&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.logs.status&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Relay log storage status as JSON (retention, counts, sizes). Check whether logging is healthy and how much history is available.&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.client.status&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Local controller client identity state and where its secret resolves from (env vs keychain), as JSON. Verify the controller is registered and logged in before running page commands.&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.authcode&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;List pending pairing codes from the relay as JSON. See codes awaiting approval when bringing a new Chrome extension node online (approve with `otto pair &lt;code&gt;` via otto.exec).&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.extension.info&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Installed Chrome extension artifact metadata (version, unpacked path, checksum) and configured relay URLs, as JSON. Confirm which extension build the host has staged for the Load-unpacked handoff.&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.agent.status&quot;,
&quot;kind&quot;: &quot;utility&quot;,
&quot;summary&quot;: &quot;Which agent frameworks (claude, codex, cursor, vscode, \u2026) currently have the Otto MCP server registered, as JSON.&quot;,
&quot;duration&quot;: &quot;fast&quot;
},
{
&quot;method&quot;: &quot;otto.help&quot;,
&quot;kind&quot;: &quot;meta&quot;,
&quot;summary&quot;: &quot;This document \u2014 every method with params, kind, and duration class.&quot;,
&quot;duration&quot;: &quot;fast&quot;,
&quot;typical_roundtrip&quot;: &quot;instant (local, no backend call)&quot;
}
]
}</pre></div>
<div><div class="h vanilla">Vanilla · smolvm --help (133 ms)</div><pre>Usage: otto [options] [command]

Automate web workflows on real browser tabs without hosting a browser farm.

Options:
-V, --version output the version number
-h, --help display help for command

Commands:
start [options] Start relay (daemon by default, or attached
for development)
stop Stop relay daemon if it is running
restart [options] Restart relay daemon
status [options] Show relay daemon status
relay:start [options] Start relay in attached foreground mode
(legacy alias)
config [options] Configure relay and defaults
setup [options] Guided controller setup including relay
daemon readiness, extension acquisition, and
Chrome handoff instructions
extension Manage extension artifact installation for
setup
settings Edit controller-global settings stored in
~/.otto/config.json
client Manage independently registered controller
clients
authcode List pending auth codes
pair &lt;code&gt; Approve pairing code and store controller
tokens
revoke Revoke stored refresh token from relay and
clear local controller auth
cmd [options] Send a command to a target node
test [options] &lt;site&gt; &lt;command&gt; Run a website command for local developer
testing
commands Discover available commands from target node
logs Read relay logs
listener Manage long-lived node listeners
extract-content [options] [url] Extract page content with one command
(markdown, distilled_html, clean_html,
raw_html, or text)
screenshot [options] &lt;url&gt; Capture a screenshot of a page and open it
locally
mcp Model Context Protocol server operations
agent Manage agent framework integrations
help [command] display help for command</pre></div>
</div></div>

<h3>Per-command detail</h3>

<section class="pair">
<h3>Version</h3>
<p class="note">enumerated → otto --version</p>
<div class="grid">
<div class="col"><div class="h vanilla">Vanilla CLI</div><div class="cmd">otto-darwin-arm64 --version</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">240 ms</span></div><pre>0.20.0</pre></div>
<div class="col"><div class="h pilot">Pilot app store</div><div class="cmd">pilotctl appstore call io.pilot.otto otto.exec &#x27;{&quot;args&quot;: [&quot;--version&quot;]}&#x27;</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">695 ms</span></div><pre>0.20.0</pre></div>
</div>
<div class="delta">adapter overhead: <b>+455 ms</b>
(vanilla 240 ms · pilot 695 ms)</div>
</section>
<section class="pair">
<h3>Relay status</h3>
<p class="note">otto status --nodes --json (relay not running here)</p>
<div class="grid">
<div class="col"><div class="h vanilla">Vanilla CLI</div><div class="cmd">otto-darwin-arm64 status --nodes --json</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">124 ms</span></div><pre>{
&quot;running&quot;: false,
&quot;daemon&quot;: null,
&quot;suggestedCommand&quot;: &quot;otto start&quot;,
&quot;logsFollowCommand&quot;: null,
&quot;nodes&quot;: []
}</pre></div>
<div class="col"><div class="h pilot">Pilot app store</div><div class="cmd">pilotctl appstore call io.pilot.otto otto.status &#x27;{}&#x27;</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">379 ms</span></div><pre>{
&quot;running&quot;: false,
&quot;daemon&quot;: null,
&quot;suggestedCommand&quot;: &quot;otto start&quot;,
&quot;logsFollowCommand&quot;: null,
&quot;nodes&quot;: []
}</pre></div>
</div>
<div class="delta">adapter overhead: <b>+255 ms</b>
(vanilla 124 ms · pilot 379 ms)</div>
</section>
<section class="pair">
<h3>Controller client status</h3>
<p class="note">otto client status (always JSON)</p>
<div class="grid">
<div class="col"><div class="h vanilla">Vanilla CLI</div><div class="cmd">otto-darwin-arm64 client status</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">125 ms</span></div><pre>{
&quot;relayUrl&quot;: &quot;ws://127.0.0.1:8787?role=controller&quot;,
&quot;relayHttpUrl&quot;: &quot;http://127.0.0.1:8787&quot;,
&quot;controllerClientId&quot;: null,
&quot;controllerName&quot;: null,
&quot;controllerDescription&quot;: null,
&quot;hasAccessToken&quot;: false,
&quot;hasRefreshToken&quot;: false,
&quot;secretSource&quot;: &quot;missing&quot;
}</pre></div>
<div class="col"><div class="h pilot">Pilot app store</div><div class="cmd">pilotctl appstore call io.pilot.otto otto.client.status &#x27;{}&#x27;</div><div class="meta"><span class="badge ok">exit 0</span><span class="t">481 ms</span></div><pre>{
&quot;relayUrl&quot;: &quot;ws://127.0.0.1:8787?role=controller&quot;,
&quot;relayHttpUrl&quot;: &quot;http://127.0.0.1:8787&quot;,
&quot;controllerClientId&quot;: null,
&quot;controllerName&quot;: null,
&quot;controllerDescription&quot;: null,
&quot;hasAccessToken&quot;: false,
&quot;hasRefreshToken&quot;: false,
&quot;secretSource&quot;: &quot;missing&quot;
}</pre></div>
</div>
<div class="delta">adapter overhead: <b>+356 ms</b>
(vanilla 125 ms · pilot 481 ms)</div>
</section>
<section class="pair">
<h3>Discover node commands</h3>
<p class="note">otto commands list --json (needs a paired node)</p>
<div class="grid">
<div class="col"><div class="h vanilla">Vanilla CLI</div><div class="cmd">otto-darwin-arm64 commands list --json</div><div class="meta"><span class="badge bad">exit 1</span><span class="t">134 ms</span></div><pre>
<span class="dim">── stderr ──</span>
Missing targetNodeId. Set with `otto config --node-id` or pass --node-id</pre></div>
<div class="col"><div class="h pilot">Pilot app store</div><div class="cmd">pilotctl appstore call io.pilot.otto otto.commands &#x27;{}&#x27;</div><div class="meta"><span class="badge bad">exit 1</span><span class="t">1098 ms</span></div><pre>
<span class="dim">── stderr ──</span>
Missing targetNodeId. Set with `otto config --node-id` or pass --node-id</pre></div>
</div>
<div class="delta">adapter overhead: <b>+964 ms</b>
(vanilla 134 ms · pilot 1098 ms)</div>
</section>
4 changes: 4 additions & 0 deletions docs/samples/otto-ab-cases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[{"label":"Version","note":"enumerated → otto --version","vanilla":["--version"],"method":"otto.exec","payload":{"args":["--version"]}},
{"label":"Relay status","note":"otto status --nodes --json (relay not running here)","vanilla":["status","--nodes","--json"],"method":"otto.status","payload":{}},
{"label":"Controller client status","note":"otto client status (always JSON)","vanilla":["client","status"],"method":"otto.client.status","payload":{}},
{"label":"Discover node commands","note":"otto commands list --json (needs a paired node)","vanilla":["commands","list","--json"],"method":"otto.commands","payload":{}}]
Loading
Loading