diff --git a/config.toml b/config.toml
index 2752ff5264..19fd3bf8aa 100644
--- a/config.toml
+++ b/config.toml
@@ -12,6 +12,23 @@ contentDir = "content/"
# ignoreErrors = ["error-remote-getcsv"]
+# Stop Hugo's filesystem watcher from opening fds for build artifacts
+# and language-package directories that ship inside use-case demos but
+# are never referenced from a page as a resource. Without these the
+# Rust `target/` and PHP `vendor/` trees alone account for ~8,500 of
+# the files under `content/`, which pushes the watcher past macOS's
+# default `kern.maxfilesperproc` ceiling on `hugo serve`.
+ignoreFiles = [
+ "/vendor/", # composer / bundler deps
+ "/node_modules/", # npm deps
+ "/target/", # rust + maven build output
+ "/bin/", # .NET build output
+ "/obj/", # .NET intermediate output
+ "/__pycache__/", # Python bytecode cache
+ "/models/", # Hugot model cache
+ "/\\.transformers-cache/", # TransformersPHP cache
+]
+
[related]
[[related.indices]]
name = 'group'
diff --git a/content/develop/use-cases/_index.md b/content/develop/use-cases/_index.md
index 085389f8eb..c5f31764a7 100644
--- a/content/develop/use-cases/_index.md
+++ b/content/develop/use-cases/_index.md
@@ -29,3 +29,4 @@ This section provides practical examples and reference implementations for commo
* [Recommendation engine]({{< relref "/develop/use-cases/recommendation-engine" >}}) - Serve personalized recommendations under tight latency budgets by combining vector similarity with structured filters in a single Redis call
* [Feature store]({{< relref "/develop/use-cases/feature-store" >}}) - Serve pre-computed ML features on the request path with mixed batch-and-streaming freshness using per-field TTL
* [Semantic cache]({{< relref "/develop/use-cases/semantic-cache" >}}) - Reuse LLM responses for semantically similar queries to cut token costs and skip multi-second model calls on near-duplicate prompts
+* [Agent memory]({{< relref "/develop/use-cases/agent-memory" >}}) - Give AI agents persistent memory that spans sessions and tasks — working memory per thread, long-term semantic recall, and a time-ordered event log on one Redis instance
diff --git a/content/develop/use-cases/agent-memory/_index.md b/content/develop/use-cases/agent-memory/_index.md
new file mode 100644
index 0000000000..e793f1d4ea
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/_index.md
@@ -0,0 +1,77 @@
+---
+categories:
+- docs
+- develop
+- stack
+- oss
+- rs
+- rc
+description: Give AI agents persistent memory that spans sessions and tasks — working memory per thread, long-term semantic recall, and a time-ordered event log — on a single Redis instance, with sub-millisecond reads on the agent loop's hot path.
+hideListLinks: true
+linkTitle: Agent memory
+title: Redis as agent memory
+weight: 8
+---
+
+## When to use Redis as agent memory
+
+Use Redis as the memory layer for an AI agent when each reasoning step needs to recall both *what just happened in this session* and *what the agent has learned over time* under a strict per-step latency budget — without standing up a separate vector database, message broker, and session store for each tier.
+
+## Why the problem is hard
+
+LLMs are stateless. Every API call starts from zero unless the application supplies the relevant context. Without a memory layer, agents re-derive information through extra LLM calls, lose personalization between sessions, and cannot coordinate state in multi-agent deployments. Some of the obvious workarounds have real drawbacks:
+
+- **A standalone vector database** can index long-term semantic memories, but doesn't cover working session state or an ordered action log, and putting a separate service on the agent's hot path adds latency that compounds across multi-step reasoning loops.
+- **In-process or app-server session storage** keeps working memory close to the agent, but disappears on process restart and can't be shared across multi-agent or load-balanced deployments — exactly the topology most production agents end up in.
+- **Stuffing everything into the LLM context window** shifts the cost of memory onto every API call, hits the model's context limit on long-running sessions, and reliably degrades reasoning quality as the context grows.
+
+The core difficulty is that an agent needs *several kinds* of memory at once — short-lived working state per thread, durable semantic recall by meaning, and an audit trail of recent actions — each with its own retention rule and access pattern. Mapping all three onto a single primitive (only a vector index, only a key-value store, only an append log) forces compromises that show up as either lost context or extra LLM calls. Memory must also stay bounded; without deduplication, summarization, and background consolidation, stale context piles up and degrades downstream accuracy.
+
+This pattern is distinct from generic [session storage]({{< relref "/develop/use-cases/session-store" >}}) (spans a single user session, no semantic recall), from [semantic caching]({{< relref "/develop/use-cases/semantic-cache" >}}) (deduplicates LLM calls, not accumulated agent knowledge), and from RAG retrieval against an external document corpus (static reference material, not the agent's own experience).
+
+## What you can expect from a Redis solution
+
+You can:
+
+- Persist and resume agent sessions by thread ID across restarts and across load-balanced workers.
+- Recall long-term memories by semantic similarity instead of exact key, scoped per user, namespace, or memory kind.
+- Prevent memory bloat by deduplicating near-identical memories at write time with the same vector index that powers recall.
+- Run semantic caching, RAG retrieval, and agent memory together on a single Redis deployment, sharing the same vector index infrastructure.
+- Keep each step in the agent reasoning loop under budget — Redis reads and writes are sub-millisecond, so the memory layer doesn't dominate per-step latency.
+
+## How Redis supports the solution
+
+In practice, each tier of agent memory maps onto a Redis primitive that's already in the cluster. **Working memory** for an active session is a [Hash]({{< relref "/develop/data-types/hashes" >}}) at a deterministic key such as `agent:session:{thread_id}`, holding the running scratchpad, current goal, and recent turns — written with [`HSET`]({{< relref "/commands/hset" >}}) and read in one round trip with [`HGETALL`]({{< relref "/commands/hgetall" >}}). **Long-term memory** — both episodic ("what happened in past sessions") and semantic ("what the agent has learned about this user or domain") — lives as [JSON]({{< relref "/develop/data-types/json" >}}) documents that carry an embedding vector, indexed by [Redis Search]({{< relref "/develop/ai/search-and-query" >}}) on a [HNSW vector field]({{< relref "/develop/ai/search-and-query/vectors" >}}) together with tag fields (user, namespace, kind, source thread). The agent recalls memories with one [`FT.SEARCH`]({{< relref "/commands/ft.search" >}}) call that combines vector similarity with metadata filtering, and the same similarity check runs at write time to deduplicate near-identical memories before they enter the store. **A time-ordered event log** of the agent's recent actions and observations is a [Stream]({{< relref "/develop/data-types/streams" >}}) appended with [`XADD`]({{< relref "/commands/xadd" >}}), replayed with [`XREVRANGE`]({{< relref "/commands/xrevrange" >}}), and bounded with [`XTRIM`]({{< relref "/commands/xtrim" >}}).
+
+Redis provides the following features that make it a good fit for agent memory:
+
+- [Hashes]({{< relref "/develop/data-types/hashes" >}}) hold per-session working memory under one key, so loading or persisting a thread's state takes a single round trip.
+- [JSON]({{< relref "/develop/data-types/json" >}}) documents store each long-term memory together with its embedding vector and metadata, so a similarity search returns everything the agent needs without a second lookup.
+- [Redis Search]({{< relref "/develop/ai/search-and-query" >}}) with [HNSW vector indexes]({{< relref "/develop/ai/search-and-query/vectors" >}}) recalls memories by meaning in sub-millisecond time, and the same [`FT.SEARCH`]({{< relref "/commands/ft.search" >}}) call applies TAG and NUMERIC filters so user, namespace, and kind scoping happen inside the query rather than in application code.
+- [Streams]({{< relref "/develop/data-types/streams" >}}) keep an ordered log of agent actions and observations, [`XTRIM`]({{< relref "/commands/xtrim" >}}) bounds retention without manual cleanup, and consumer groups let downstream workers — summarizers, consolidators — replay the log without losing position.
+- [`EXPIRE`]({{< relref "/commands/expire" >}}) automates memory decay per tier — short TTLs on working memory, longer on episodic long-term memories, no TTL on semantic ones — so stale context falls off without a separate cleanup job. (The event log is bounded separately, by [`XADD MAXLEN`]({{< relref "/commands/xadd" >}}) on the Stream, not by `EXPIRE`.)
+- Sub-millisecond reads and writes from memory keep each turn of the agent loop under budget, and a single Redis instance can carry working memory, long-term recall, the event log, semantic caching, and RAG retrieval at zero marginal infrastructure cost.
+
+## Ecosystem
+
+The following libraries, frameworks, and managed services build on Redis for agent memory:
+
+- **Python**: [RedisVL]({{< relref "/develop/ai/redisvl" >}}) provides vector-index, session-manager, and semantic-memory helpers you can compose into an agent memory layer.
+- **Frameworks**: [LangChain]({{< relref "/integrate/langchain-redis" >}}) supports Redis as a chat history and memory backend, and [LangGraph & Redis](https://redis.io/blog/langgraph-redis-build-smarter-ai-agents-with-memory-persistence/) ships a Redis checkpointer for persisting graph state across runs.
+- **AWS**: [Amazon Bedrock]({{< relref "/integrate/amazon-bedrock" >}}) agent runtimes integrate with Redis for memory persistence and vector search.
+- **Any language**: standard Redis client libraries cover the pattern below for custom agent loops.
+- **Managed**: [Redis Agent Memory Server]({{< relref "/develop/ai/context-engine/agent-memory" >}}) is a managed agent memory service with REST and MCP interfaces, working and long-term memory tiers, deduplication, summarization, and background consolidation — useful when you'd rather not build and operate the pattern below yourself.
+
+## Code examples to build your own Redis agent memory
+
+The following guides show how to build a small Redis-backed agent memory layer using only standard Redis commands — working memory in a hash per thread, long-term memory as JSON documents with a vector index, an event log in a stream, and per-tier TTLs for decay. Each guide includes a runnable interactive demo where you can send turns, watch working memory update, see semantic recall against past memories, and inspect the event log.
+
+* [redis-py (Python)]({{< relref "/develop/use-cases/agent-memory/redis-py" >}})
+* [node-redis (Node.js)]({{< relref "/develop/use-cases/agent-memory/nodejs" >}})
+* [NRedisStack (C#)]({{< relref "/develop/use-cases/agent-memory/dotnet" >}})
+* [redis-rs (Rust)]({{< relref "/develop/use-cases/agent-memory/rust" >}})
+* [go-redis (Go)]({{< relref "/develop/use-cases/agent-memory/go" >}})
+* [Jedis (Java)]({{< relref "/develop/use-cases/agent-memory/java-jedis" >}})
+* [Lettuce (Java)]({{< relref "/develop/use-cases/agent-memory/java-lettuce" >}})
+* [Predis (PHP)]({{< relref "/develop/use-cases/agent-memory/php" >}})
+* [redis-rb (Ruby)]({{< relref "/develop/use-cases/agent-memory/ruby" >}})
diff --git a/content/develop/use-cases/agent-memory/dotnet/.gitignore b/content/develop/use-cases/agent-memory/dotnet/.gitignore
new file mode 100644
index 0000000000..8f5917312f
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/.gitignore
@@ -0,0 +1,7 @@
+bin/
+obj/
+model_cache/
+*.user
+*.suo
+.vs/
+.idea/
diff --git a/content/develop/use-cases/agent-memory/dotnet/AgentEventLog.cs b/content/develop/use-cases/agent-memory/dotnet/AgentEventLog.cs
new file mode 100644
index 0000000000..2826deef40
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/AgentEventLog.cs
@@ -0,0 +1,119 @@
+using System.Globalization;
+using StackExchange.Redis;
+
+namespace AgentMemoryDemo;
+
+///
+/// Append-only event log for an agent thread, backed by a Redis
+/// Stream.
+///
+///
+/// Each thread gets a stream at agent:events:{threadId}.
+/// Every action the agent takes (a user turn arriving, a memory being
+/// recalled, a memory being written, a tool being called) is one
+/// XADD to that stream. Replay with XREVRANGE for the
+/// most recent N events; bound retention with XTRIM MAXLEN ~
+/// so the log stays cheap regardless of how long the thread has been
+/// running.
+///
+/// The stream is independent of the session hash and the
+/// long-term memory store: it answers the "what just happened"
+/// question without competing with either of those for indexing or
+/// memory budget. Consumer groups (not used in this demo) would let
+/// downstream workers — summarisers, consolidators, audit pipelines —
+/// replay the log without losing position.
+///
+public sealed class AgentEventLog
+{
+ ///
+ /// Approximate cap on stream length. MAXLEN ~ lets Redis
+ /// trim in whole-node units instead of exactly-N units, which is
+ /// much cheaper at the cost of overshooting the bound by up to a
+ /// node's worth.
+ ///
+ public const int DefaultMaxLen = 1000;
+
+ private readonly IDatabase _db;
+ public string KeyPrefix { get; }
+ public int MaxLen { get; }
+
+ public AgentEventLog(
+ IDatabase db,
+ string keyPrefix = "agent:events:",
+ int maxLen = DefaultMaxLen)
+ {
+ _db = db;
+ KeyPrefix = keyPrefix;
+ MaxLen = maxLen;
+ }
+
+ public string StreamKey(string threadId) => KeyPrefix + threadId;
+
+ ///
+ /// Append one event and return its stream id.
+ ///
+ ///
+ /// MAXLEN ~ N keeps the stream bounded with near-zero
+ /// overhead; an exact bound (MAXLEN N without the tilde)
+ /// forces a scan and is rarely worth the cost.
+ ///
+ public string Record(string threadId, string action, string detail = "")
+ {
+ var fields = new NameValueEntry[]
+ {
+ new("action", action),
+ new("detail", detail),
+ new("ts", UnixSeconds().ToString("F6", CultureInfo.InvariantCulture)),
+ };
+ // StreamAdd's `useApproximateMaxLength: true` issues
+ // `MAXLEN ~ N` rather than the exact form.
+ RedisValue id = _db.StreamAdd(
+ StreamKey(threadId),
+ fields,
+ messageId: null,
+ maxLength: MaxLen,
+ useApproximateMaxLength: true);
+ return (string)id!;
+ }
+
+ /// Return the most recent events, newest first.
+ ///
+ /// StackExchange.Redis swaps the minId / maxId
+ /// arguments when it issues XREVRANGE under
+ /// , so the caller still passes
+ /// "low, high" in natural order (- / +). Passing
+ /// them the other way around — + / - — would issue
+ /// XREVRANGE key - +, which Redis interprets as an empty
+ /// range and returns nothing.
+ ///
+ public List Recent(string threadId, int count = 20)
+ {
+ var entries = _db.StreamRange(
+ StreamKey(threadId), "-", "+", count: count, messageOrder: Order.Descending);
+ var out_ = new List(entries.Length);
+ foreach (var entry in entries)
+ {
+ var fields = entry.Values.ToDictionary(v => (string)v.Name!, v => (string)v.Value!);
+ out_.Add(new AgentEvent(
+ EventId: (string)entry.Id!,
+ ThreadId: threadId,
+ Action: fields.GetValueOrDefault("action") ?? "",
+ Detail: fields.GetValueOrDefault("detail") ?? "",
+ Ts: ParseDouble(fields.GetValueOrDefault("ts"), 0)));
+ }
+ return out_;
+ }
+
+ /// Current stream length.
+ public long Length(string threadId) => _db.StreamLength(StreamKey(threadId));
+
+ /// Drop the entire stream for a thread.
+ public bool Clear(string threadId) => _db.KeyDelete(StreamKey(threadId));
+
+ private static double UnixSeconds()
+ => DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() / 1000.0;
+
+ private static double ParseDouble(string? value, double fallback)
+ => double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var d)
+ ? d : fallback;
+}
diff --git a/content/develop/use-cases/agent-memory/dotnet/AgentMemoryDemo.csproj b/content/develop/use-cases/agent-memory/dotnet/AgentMemoryDemo.csproj
new file mode 100644
index 0000000000..74d7378315
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/AgentMemoryDemo.csproj
@@ -0,0 +1,40 @@
+
+
+
+ Exe
+ net8.0
+ AgentMemoryDemo
+ AgentMemoryDemo
+ enable
+ enable
+ latest
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
+
+
diff --git a/content/develop/use-cases/agent-memory/dotnet/AgentSession.cs b/content/develop/use-cases/agent-memory/dotnet/AgentSession.cs
new file mode 100644
index 0000000000..1b5bd6f595
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/AgentSession.cs
@@ -0,0 +1,305 @@
+using System.Globalization;
+using System.Text.Json;
+using StackExchange.Redis;
+
+namespace AgentMemoryDemo;
+
+///
+/// Working-memory store for an agent session, backed by a Redis Hash.
+///
+///
+/// Each session is one Hash document at
+/// agent:session:{threadId}. The hash holds the running
+/// scratchpad, the current goal, a rolling window of recent turns
+/// (serialised as a JSON list to fit in one field), and a few audit
+/// fields. One HGETALL returns the whole session in a single
+/// round trip on every step of the agent loop.
+///
+/// Every write refreshes the key's TTL with EXPIRE, so
+/// idle sessions fall off without a separate cleanup job and active
+/// sessions stay alive as long as the agent keeps touching them. A
+/// separate is what survives beyond a
+/// session's TTL.
+///
+/// The turn window is bounded to in
+/// application code; the hash itself doesn't grow, so the working set
+/// per thread stays constant regardless of how long the agent has
+/// been running.
+///
+public sealed class AgentSession
+{
+ // How many recent turns to keep inline on the session hash. Older
+ // turns flow through the event log (AgentEventLog) and the
+ // long-term memory store (LongTermMemory).
+ public const int DefaultMaxTurns = 20;
+
+ private readonly IDatabase _db;
+ public string KeyPrefix { get; }
+ public long DefaultTtlSeconds { get; }
+ public int MaxTurns { get; }
+
+ public AgentSession(
+ IDatabase db,
+ string keyPrefix = "agent:session:",
+ long defaultTtlSeconds = 3600,
+ int maxTurns = DefaultMaxTurns)
+ {
+ _db = db;
+ KeyPrefix = keyPrefix;
+ DefaultTtlSeconds = defaultTtlSeconds;
+ MaxTurns = maxTurns;
+ }
+
+ public string SessionKey(string threadId) => KeyPrefix + threadId;
+
+ public string NewThreadId() => Guid.NewGuid().ToString("N").Substring(0, 12);
+
+ ///
+ /// Create a fresh working memory for a thread. Overwrites any
+ /// existing session at the same key. The agent normally calls
+ /// this once per thread at the first turn and relies on
+ /// / for subsequent
+ /// steps.
+ ///
+ public SessionState Start(
+ string threadId,
+ string user = "default",
+ string agentName = "default",
+ string goal = "",
+ long? ttlSeconds = null)
+ {
+ long ttl = ttlSeconds ?? DefaultTtlSeconds;
+ double now = UnixSeconds();
+ var state = new SessionState(
+ ThreadId: threadId,
+ User: user,
+ Agent: agentName,
+ Goal: goal,
+ Scratchpad: "",
+ TurnCount: 0,
+ CreatedTs: now,
+ LastActiveTs: now,
+ RecentTurns: Array.Empty(),
+ TtlSeconds: ttl);
+ Write(state, ttl);
+ return state;
+ }
+
+ ///
+ /// Return the session state, or null if it has expired.
+ ///
+ public SessionState? Load(string threadId)
+ {
+ string key = SessionKey(threadId);
+ var raw = _db.HashGetAll(key);
+ if (raw is null || raw.Length == 0) return null;
+ var fields = raw.ToDictionary(e => (string)e.Name!, e => (string)e.Value!);
+ TimeSpan? ttl = _db.KeyTimeToLive(key);
+ long ttlSeconds = ttl is { TotalSeconds: > 0 } v ? (long)v.TotalSeconds : 0L;
+ string turnsBlob = fields.GetValueOrDefault("recent_turns") ?? "[]";
+ var turns = TryDeserializeTurns(turnsBlob);
+ return new SessionState(
+ ThreadId: threadId,
+ User: fields.GetValueOrDefault("user") ?? "default",
+ Agent: fields.GetValueOrDefault("agent") ?? "default",
+ Goal: fields.GetValueOrDefault("goal") ?? "",
+ Scratchpad: fields.GetValueOrDefault("scratchpad") ?? "",
+ TurnCount: ParseLong(fields.GetValueOrDefault("turn_count"), 0),
+ CreatedTs: ParseDouble(fields.GetValueOrDefault("created_ts"), 0),
+ LastActiveTs: ParseDouble(fields.GetValueOrDefault("last_active_ts"), 0),
+ RecentTurns: turns,
+ TtlSeconds: ttlSeconds);
+ }
+
+ ///
+ /// Append a turn, bound the rolling window, refresh the TTL.
+ ///
+ ///
+ /// and
+ /// are only consulted when the session does not yet exist — they
+ /// seed the auto-created session so the working-memory hash
+ /// matches the user the caller is operating against. On an
+ /// existing session they're ignored; the original Start
+ /// values stand.
+ ///
+ /// Read-modify-write here is last-writer-wins on the turn
+ /// list if two concurrent turns reach the same thread; the demo
+ /// never triggers that race in practice (one browser, one turn at
+ /// a time) but a multi-worker agent that shares a thread id would
+ /// wrap this in WATCH / MULTI / EXEC or a
+ /// Lua script that does the append atomically server-side.
+ ///
+ public SessionState AppendTurn(
+ string threadId,
+ string role,
+ string content,
+ string? user = null,
+ string? agentName = null,
+ long? ttlSeconds = null)
+ {
+ var state = Load(threadId)
+ ?? Start(
+ threadId,
+ user: user ?? "default",
+ agentName: agentName ?? "default",
+ ttlSeconds: ttlSeconds);
+
+ var newTurns = state.RecentTurns.ToList();
+ newTurns.Add(new SessionTurn(Role: role, Content: content, Ts: UnixSeconds()));
+ if (newTurns.Count > MaxTurns)
+ {
+ newTurns = newTurns.GetRange(newTurns.Count - MaxTurns, MaxTurns);
+ }
+
+ long ttl = ttlSeconds ?? DefaultTtlSeconds;
+ var next = state with
+ {
+ TurnCount = state.TurnCount + 1,
+ LastActiveTs = UnixSeconds(),
+ RecentTurns = newTurns,
+ TtlSeconds = ttl,
+ };
+ Write(next, ttl);
+ return next;
+ }
+
+ ///
+ /// Update the agent's running scratchpad and refresh the TTL.
+ /// Returns null when the session does not exist.
+ ///
+ public SessionState? SetScratchpad(string threadId, string text, long? ttlSeconds = null)
+ {
+ var state = Load(threadId);
+ if (state is null) return null;
+ long ttl = ttlSeconds ?? DefaultTtlSeconds;
+ var next = state with
+ {
+ Scratchpad = text,
+ LastActiveTs = UnixSeconds(),
+ TtlSeconds = ttl,
+ };
+ Write(next, ttl);
+ return next;
+ }
+
+ ///
+ /// Update the goal field without touching turns or the scratchpad.
+ /// Creates the session if it doesn't exist yet — setting a goal
+ /// on a fresh thread is a sensible first step in the agent loop,
+ /// so this method covers both the "rename the goal mid-session"
+ /// and the "start a thread with this goal" cases.
+ ///
+ public SessionState SetGoal(
+ string threadId,
+ string text,
+ string? user = null,
+ string? agentName = null,
+ long? ttlSeconds = null)
+ {
+ var state = Load(threadId);
+ if (state is null)
+ {
+ return Start(
+ threadId,
+ user: user ?? "default",
+ agentName: agentName ?? "default",
+ goal: text,
+ ttlSeconds: ttlSeconds);
+ }
+ long ttl = ttlSeconds ?? DefaultTtlSeconds;
+ var next = state with
+ {
+ Goal = text,
+ LastActiveTs = UnixSeconds(),
+ TtlSeconds = ttl,
+ };
+ Write(next, ttl);
+ return next;
+ }
+
+ /// Drop the session immediately. Returns true if it existed.
+ public bool Delete(string threadId) => _db.KeyDelete(SessionKey(threadId));
+
+ /// Return active thread ids (for the demo's thread switcher).
+ public List ListThreads(int limit = 100)
+ {
+ var out_ = new List();
+ // SCAN via the server-set option; this stays incremental even
+ // on a database with many session keys.
+ var server = _db.Multiplexer.GetServer(_db.Multiplexer.GetEndPoints().First());
+ foreach (var key in server.Keys(database: _db.Database, pattern: KeyPrefix + "*", pageSize: 200))
+ {
+ string raw = (string)key!;
+ out_.Add(raw.StartsWith(KeyPrefix) ? raw.Substring(KeyPrefix.Length) : raw);
+ if (out_.Count >= limit) break;
+ }
+ return out_;
+ }
+
+ private void Write(SessionState state, long ttl)
+ {
+ string key = SessionKey(state.ThreadId);
+ var entries = new HashEntry[]
+ {
+ new("thread_id", state.ThreadId),
+ new("user", state.User),
+ new("agent", state.Agent),
+ new("goal", state.Goal),
+ new("scratchpad", state.Scratchpad),
+ new("turn_count", state.TurnCount.ToString(CultureInfo.InvariantCulture)),
+ new("created_ts", state.CreatedTs.ToString("F6", CultureInfo.InvariantCulture)),
+ new("last_active_ts", state.LastActiveTs.ToString("F6", CultureInfo.InvariantCulture)),
+ new("recent_turns", JsonSerializer.Serialize(state.RecentTurns.Select(t => new
+ {
+ role = t.Role,
+ content = t.Content,
+ ts = t.Ts,
+ }))),
+ };
+
+ // MULTI/EXEC so HSET and EXPIRE either both apply or neither
+ // does. A connection drop between the two writes would
+ // otherwise leave the session without a TTL. We check the
+ // return value of Execute() — there's no WATCH on this
+ // transaction so a false here means the server rejected the
+ // batch (out of memory, OOM script kill, etc.); surface it
+ // rather than letting the in-memory state drift from Redis.
+ var tx = _db.CreateTransaction();
+ _ = tx.HashSetAsync(key, entries);
+ _ = tx.KeyExpireAsync(key, TimeSpan.FromSeconds(ttl));
+ if (!tx.Execute())
+ {
+ throw new RedisServerException("session write MULTI/EXEC was discarded");
+ }
+ }
+
+ private static IReadOnlyList TryDeserializeTurns(string blob)
+ {
+ try
+ {
+ using var doc = JsonDocument.Parse(blob);
+ var result = new List(doc.RootElement.GetArrayLength());
+ foreach (var el in doc.RootElement.EnumerateArray())
+ {
+ string role = el.TryGetProperty("role", out var r) ? r.GetString() ?? "" : "";
+ string content = el.TryGetProperty("content", out var c) ? c.GetString() ?? "" : "";
+ double ts = el.TryGetProperty("ts", out var t) && t.TryGetDouble(out var d) ? d : 0.0;
+ result.Add(new SessionTurn(role, content, ts));
+ }
+ return result;
+ }
+ catch (JsonException)
+ {
+ return Array.Empty();
+ }
+ }
+
+ private static double UnixSeconds()
+ => DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() / 1000.0;
+
+ private static double ParseDouble(string? value, double fallback)
+ => double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var d) ? d : fallback;
+
+ private static long ParseLong(string? value, long fallback)
+ => long.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var l) ? l : fallback;
+}
diff --git a/content/develop/use-cases/agent-memory/dotnet/LocalEmbedder.cs b/content/develop/use-cases/agent-memory/dotnet/LocalEmbedder.cs
new file mode 100644
index 0000000000..9778bccdb3
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/LocalEmbedder.cs
@@ -0,0 +1,276 @@
+using System.Buffers.Binary;
+using System.Net.Http;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using Microsoft.ML.Tokenizers;
+
+namespace AgentMemoryDemo;
+
+///
+/// Local text-embedding helper backed by ONNX Runtime + a Bert
+/// WordPiece tokenizer.
+///
+///
+/// This is a thin wrapper around the
+/// sentence-transformers/all-MiniLM-L6-v2 model loaded as an
+/// ONNX export from the Xenova/all-MiniLM-L6-v2 Hugging Face
+/// mirror: a 384-dimensional encoder that runs in-process on CPU
+/// through ONNX Runtime, needs no API key, and produces vectors
+/// numerically very close to the equivalent Python and Node ports
+/// (close enough that paraphrase distances differ only at the second
+/// or third decimal place).
+///
+/// The class downloads model.onnx and the
+/// vocab.txt WordPiece dictionary into a local cache directory
+/// on the first call; every later run is offline. Vectors are mean-
+/// pooled over the token positions (weighted by the attention mask)
+/// and then L2-normalised explicitly so a Redis Search index declared
+/// with DISTANCE_METRIC COSINE returns scores that are
+/// directly comparable across entries.
+///
+public sealed class LocalEmbedder : IDisposable
+{
+ public const string DefaultModelName = "sentence-transformers/all-MiniLM-L6-v2";
+ public const int DefaultVectorDim = 384;
+
+ // The Xenova mirror is the Node demo's source; the ONNX export
+ // and vocab there match the original sentence-transformers
+ // checkpoint and give us a single dependency-free download URL.
+ private const string ModelUrl =
+ "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx";
+ private const string VocabUrl =
+ "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt";
+
+ private readonly InferenceSession _session;
+ private readonly BertTokenizer _tokenizer;
+
+ public string ModelName { get; }
+ public int Dim { get; }
+
+ private LocalEmbedder(
+ string modelName,
+ InferenceSession session,
+ BertTokenizer tokenizer,
+ int dim)
+ {
+ ModelName = modelName;
+ _session = session;
+ _tokenizer = tokenizer;
+ Dim = dim;
+ }
+
+ ///
+ /// Load the default model. Blocks while ONNX Runtime initialises
+ /// and the model + tokenizer files are downloaded on the first
+ /// run. The single is shared
+ /// across handler threads — ONNX Runtime documents
+ /// InferenceSession.Run as thread-safe.
+ ///
+ ///
+ /// Directory the model and tokenizer files are cached in. Created
+ /// if it doesn't exist. Defaults to ./model_cache next to
+ /// the running binary, so a fresh checkout doesn't re-download on
+ /// every dotnet run.
+ ///
+ public static async Task CreateAsync(string? cacheDir = null)
+ {
+ cacheDir ??= Path.Combine(AppContext.BaseDirectory, "model_cache");
+ Directory.CreateDirectory(cacheDir);
+
+ string modelPath = Path.Combine(cacheDir, "model.onnx");
+ string vocabPath = Path.Combine(cacheDir, "vocab.txt");
+
+ await DownloadIfMissingAsync(ModelUrl, modelPath);
+ await DownloadIfMissingAsync(VocabUrl, vocabPath);
+
+ // The Xenova / sentence-transformers MiniLM tokenizer config
+ // says lower_case=true, do_basic_tokenize=true,
+ // tokenize_chinese_chars=true; surface those flags here so
+ // the tokens match the ones produced by the Python /
+ // Node.js sibling demos.
+ var options = new BertOptions
+ {
+ LowerCaseBeforeTokenization = true,
+ ApplyBasicTokenization = true,
+ IndividuallyTokenizeCjk = true,
+ };
+ var tokenizer = BertTokenizer.Create(vocabPath, options);
+
+ // One session per process; ONNX Runtime explicitly documents
+ // it as thread-safe for inference, so we can share it across
+ // every HttpListener handler thread without further
+ // synchronisation.
+ var session = new InferenceSession(modelPath);
+
+ // Probe the output shape once so we fail loudly if a different
+ // model is ever wired up against the 384-dim Redis Search
+ // field.
+ var probe = EncodeInternal(session, tokenizer, "dimension probe");
+ return new LocalEmbedder(DefaultModelName, session, tokenizer, probe.Length);
+ }
+
+ private static async Task DownloadIfMissingAsync(string url, string path)
+ {
+ if (File.Exists(path)) return;
+ Console.WriteLine($"Downloading {url}");
+ using var http = new HttpClient
+ {
+ Timeout = TimeSpan.FromMinutes(5),
+ };
+ using var stream = await http.GetStreamAsync(url);
+ // Write to a temp path and rename so a Ctrl-C during the
+ // download doesn't leave a half-written file the next run
+ // would happily skip.
+ string tmp = path + ".part";
+ using (var file = File.Create(tmp))
+ {
+ await stream.CopyToAsync(file);
+ }
+ File.Move(tmp, path, overwrite: true);
+ }
+
+ ///
+ /// Encode a single string. Returns a float[] of length
+ /// .
+ ///
+ public float[] EncodeOne(string text) => EncodeInternal(_session, _tokenizer, text);
+
+ ///
+ /// Encode several strings sequentially and return one vector per
+ /// input. Throws when the underlying session returns a different
+ /// number of vectors than inputs.
+ ///
+ public List EncodeMany(IReadOnlyList texts)
+ {
+ var results = new List(texts.Count);
+ foreach (var text in texts)
+ {
+ results.Add(EncodeInternal(_session, _tokenizer, text));
+ }
+ if (results.Count != texts.Count)
+ {
+ // Belt-and-braces. The loop above guarantees one vector
+ // per input on the happy path, but surfacing this as an
+ // explicit check matches the contract the seed loader
+ // relies on and avoids an index-out-of-range later if a
+ // future refactor batches into a single Run() call.
+ throw new InvalidOperationException(
+ $"embedder produced {results.Count} vectors for {texts.Count} inputs");
+ }
+ return results;
+ }
+
+ private static float[] EncodeInternal(
+ InferenceSession session, BertTokenizer tokenizer, string text)
+ {
+ // BertTokenizer.EncodeToIds adds the [CLS] / [SEP] sentinels
+ // that the MiniLM ONNX export expects. considerPreTokenization
+ // splits on whitespace + punctuation before WordPiece, which
+ // matches the do_basic_tokenize=true in the upstream
+ // tokenizer config.
+ var ids = tokenizer
+ .EncodeToIds(text, addSpecialTokens: true, considerPreTokenization: true)
+ .ToArray();
+ int seqLen = ids.Length;
+ // Empty strings still need at least [CLS] [SEP] so the model
+ // has something to attend to. EncodeToIds gives us that for
+ // the empty string already; the guard above is just defensive.
+
+ var idsLong = new long[seqLen];
+ var mask = new long[seqLen];
+ var tokenType = new long[seqLen];
+ for (int i = 0; i < seqLen; i++)
+ {
+ idsLong[i] = ids[i];
+ mask[i] = 1;
+ tokenType[i] = 0;
+ }
+
+ var inputIds = new DenseTensor(idsLong, new[] { 1, seqLen });
+ var attentionMask = new DenseTensor(mask, new[] { 1, seqLen });
+ var tokenTypes = new DenseTensor(tokenType, new[] { 1, seqLen });
+
+ var inputs = new List
+ {
+ NamedOnnxValue.CreateFromTensor("input_ids", inputIds),
+ NamedOnnxValue.CreateFromTensor("attention_mask", attentionMask),
+ NamedOnnxValue.CreateFromTensor("token_type_ids", tokenTypes),
+ };
+
+ using var results = session.Run(inputs);
+ // The MiniLM ONNX export exposes a single output named
+ // last_hidden_state of shape [batch, seq, dim]. Pick it by
+ // position so we don't depend on a specific name across
+ // future re-exports.
+ var output = results[0].AsTensor();
+ int dim = output.Dimensions[2];
+ var pooled = new float[dim];
+
+ // Attention-masked mean pooling — the standard
+ // sentence-transformers recipe. The mask is all 1s here
+ // because we never pad, but write the masked sum so the
+ // code stays correct under a future batched implementation.
+ double maskTotal = 0;
+ for (int s = 0; s < seqLen; s++)
+ {
+ double w = mask[s];
+ maskTotal += w;
+ for (int d = 0; d < dim; d++)
+ {
+ pooled[d] += (float)(output[0, s, d] * w);
+ }
+ }
+ if (maskTotal > 0)
+ {
+ float inv = (float)(1.0 / maskTotal);
+ for (int d = 0; d < dim; d++) pooled[d] *= inv;
+ }
+
+ // L2-normalise explicitly. The MiniLM ONNX export does not
+ // ship the normalisation step the Python sentence-transformers
+ // pipeline applies by default with normalize_embeddings=True;
+ // doing it here keeps the cosine distances comparable across
+ // the Python, Node, Go, Java, and .NET demos.
+ double sq = 0;
+ foreach (var v in pooled) sq += (double)v * v;
+ if (sq > 0)
+ {
+ float inv = (float)(1.0 / Math.Sqrt(sq));
+ for (int d = 0; d < dim; d++) pooled[d] *= inv;
+ }
+ return pooled;
+ }
+
+ ///
+ /// Pack a float[] into the bytes Redis Search expects for
+ /// a FLOAT32 vector field — raw little-endian float32
+ /// values, no header, no padding. Matches the encoding the
+ /// Python, Node, Go, and Java ports write.
+ ///
+ ///
+ /// We use
+ /// rather than because
+ /// the latter follows host endianness; explicit little-endian
+ /// here means the docs example is portable even on a hypothetical
+ /// big-endian .NET host.
+ /// is checked once at process start in to
+ /// catch any future surprise — every supported .NET runtime
+ /// today is little-endian, but the assertion documents the
+ /// assumption.
+ ///
+ public static byte[] ToBytes(float[] vector)
+ {
+ var bytes = new byte[vector.Length * sizeof(float)];
+ var span = bytes.AsSpan();
+ for (int i = 0; i < vector.Length; i++)
+ {
+ BinaryPrimitives.WriteSingleLittleEndian(span.Slice(i * sizeof(float)), vector[i]);
+ }
+ return bytes;
+ }
+
+ public void Dispose()
+ {
+ _session.Dispose();
+ }
+}
diff --git a/content/develop/use-cases/agent-memory/dotnet/LongTermMemory.cs b/content/develop/use-cases/agent-memory/dotnet/LongTermMemory.cs
new file mode 100644
index 0000000000..161864efc1
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/LongTermMemory.cs
@@ -0,0 +1,498 @@
+using System.Globalization;
+using System.Text.Json;
+using NRedisStack;
+using NRedisStack.RedisStackCommands;
+using NRedisStack.Search;
+using NRedisStack.Search.Literals.Enums;
+using StackExchange.Redis;
+
+namespace AgentMemoryDemo;
+
+///
+/// Long-term memory store for an agent, backed by Redis JSON and
+/// Search.
+///
+///
+/// Each memory lives as one JSON document at
+/// agent:mem:<id>. The document holds the memory text,
+/// its embedding vector, and a small metadata block — user,
+/// namespace, kind, source thread, timestamps — that lets the recall
+/// query scope results without falling back to application-side
+/// filtering.
+///
+/// A single Redis Search index covers the embedding plus every
+/// metadata field, so one FT.SEARCH call performs approximate-
+/// nearest-neighbour over the in-scope subset and returns the top-k
+/// memories ranked by cosine distance. The same KNN check runs at
+/// write time to deduplicate near-identical memories before
+/// they enter the store, which keeps the index from filling with
+/// paraphrases of the same fact as the agent reasons over similar
+/// topics across sessions.
+///
+/// Memories carry one of two kinds: episodic snapshots
+/// from a specific thread, written with a medium TTL so old session
+/// detail decays naturally; semantic distilled facts and
+/// preferences the agent should carry forward indefinitely, written
+/// with no TTL by default. The split is enforced as a TAG on the
+/// index, so the recall query can ask for one kind or both with a
+/// filter — no separate keyspaces.
+///
+public sealed class LongTermMemory
+{
+ public const int VectorDimDefault = 384;
+
+ ///
+ /// How close (cosine distance) a candidate must be to an existing
+ /// memory to count as a duplicate at write time. Smaller =
+ /// stricter. 0.20 is calibrated to the
+ /// sentence-transformers/all-MiniLM-L6-v2 embedding model
+ /// used in the demo, where a paraphrase of an existing memory
+ /// lands in the 0.10 – 0.20 range and a distinct memory lands
+ /// above 0.50.
+ ///
+ public const double DefaultDedupThreshold = 0.20;
+
+ ///
+ /// How close (cosine distance) a candidate must be to count as a
+ /// relevant recall result. Larger than the dedup threshold so the
+ /// agent gets a wider net at read time than at write time.
+ ///
+ public const double DefaultRecallThreshold = 0.55;
+
+ ///
+ /// TTL tiers, in seconds. null means "no TTL" — the memory
+ /// persists until explicitly deleted or evicted under memory
+ /// pressure.
+ ///
+ public static readonly IReadOnlyDictionary DefaultTtlByKind =
+ new Dictionary
+ {
+ ["episodic"] = 7L * 24 * 3600,
+ ["semantic"] = null,
+ };
+
+ // Characters Redis Search treats as syntax inside a TAG value;
+ // any of them in a user-supplied filter must be backslash-escaped
+ // or the surrounding `{...}` block won't parse correctly.
+ private static readonly HashSet TagSpecial = new(
+ "\\,.<>{}[]\"':;!@#$%^&*()-+=~| ");
+
+ private readonly IDatabase _db;
+ private readonly ISearchCommands _ft;
+ private readonly IJsonCommands _json;
+ private readonly IReadOnlyDictionary _ttlByKind;
+
+ public string IndexName { get; }
+ public string KeyPrefix { get; }
+ public int VectorDim { get; }
+ public double DedupThreshold { get; }
+ public double RecallThreshold { get; }
+
+ public LongTermMemory(
+ IDatabase db,
+ string indexName = "agentmem:idx",
+ string keyPrefix = "agent:mem:",
+ int vectorDim = VectorDimDefault,
+ double dedupThreshold = DefaultDedupThreshold,
+ double recallThreshold = DefaultRecallThreshold,
+ IReadOnlyDictionary? ttlByKind = null)
+ {
+ _db = db;
+ _ft = db.FT();
+ _json = db.JSON();
+ IndexName = indexName;
+ KeyPrefix = keyPrefix;
+ VectorDim = vectorDim;
+ DedupThreshold = dedupThreshold;
+ RecallThreshold = recallThreshold;
+ _ttlByKind = ttlByKind ?? DefaultTtlByKind;
+ }
+
+ public string MemoryKey(string memoryId) => KeyPrefix + memoryId;
+
+ // ------------------------------------------------------------------
+ // Index management
+ // ------------------------------------------------------------------
+
+ ///
+ /// Create the Redis Search index if it doesn't already exist.
+ ///
+ ///
+ /// The index is declared on the JSON document type with alias
+ /// names on each path; the same FT.SEARCH filter clause
+ /// works here as on a HASH-backed index, and the field paths
+ /// ($.user, $.embedding, ...) only show up in
+ /// FT.CREATE.
+ ///
+ public void CreateIndex()
+ {
+ var schema = new Schema()
+ .AddTextField(new FieldName("$.text", "text"))
+ .AddTagField(new FieldName("$.user", "user"))
+ .AddTagField(new FieldName("$.namespace", "namespace"))
+ .AddTagField(new FieldName("$.kind", "kind"))
+ .AddTagField(new FieldName("$.source_thread", "source_thread"))
+ .AddNumericField(new FieldName("$.created_ts", "created_ts"), sortable: true)
+ .AddNumericField(new FieldName("$.hit_count", "hit_count"), sortable: true)
+ .AddVectorField(
+ new FieldName("$.embedding", "embedding"),
+ Schema.VectorField.VectorAlgo.HNSW,
+ new Dictionary
+ {
+ ["TYPE"] = "FLOAT32",
+ ["DIM"] = VectorDim,
+ ["DISTANCE_METRIC"] = "COSINE",
+ });
+ try
+ {
+ _ft.Create(
+ IndexName,
+ new FTCreateParams()
+ .On(IndexDataType.JSON)
+ .Prefix(KeyPrefix),
+ schema);
+ }
+ catch (RedisServerException ex)
+ when (ex.Message.Contains("Index already exists", StringComparison.OrdinalIgnoreCase))
+ {
+ // Idempotent.
+ }
+ }
+
+ /// Drop the search index. Optionally also delete the JSON docs.
+ public void DropIndex(bool deleteDocuments = false)
+ {
+ try
+ {
+ _ft.DropIndex(IndexName, deleteDocuments);
+ }
+ catch (RedisServerException ex)
+ {
+ string msg = ex.Message ?? "";
+ if (!msg.Contains("no such index", StringComparison.OrdinalIgnoreCase)
+ && !msg.Contains("unknown index name", StringComparison.OrdinalIgnoreCase))
+ {
+ throw;
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------
+ // Write
+ // ------------------------------------------------------------------
+
+ ///
+ /// Write a new memory, deduplicating against existing entries.
+ ///
+ ///
+ /// Runs one in-scope KNN(1) against the index first.
+ /// If the nearest existing memory is within
+ /// , the new memory is skipped (its
+ /// content is already represented) and the existing memory's
+ /// hit_count is bumped via JSON.NUMINCRBY. Otherwise
+ /// a fresh JSON document is written under a new id with a TTL
+ /// derived from the memory's kind.
+ ///
+ /// The KNN-then-write sequence is not atomic; two workers
+ /// that remember the same fact at the same time can both miss
+ /// each other's in-flight write and insert duplicate memories.
+ /// See the walkthrough's "Concurrency caveats" section for the
+ /// production fix (periodic background consolidator that merges
+ /// near-duplicates).
+ ///
+ public WriteResult Remember(
+ string text,
+ float[] embedding,
+ string user = "default",
+ string @namespace = "default",
+ string kind = "episodic",
+ string sourceThread = "",
+ long? ttlSeconds = null)
+ {
+ if (embedding is null) throw new ArgumentNullException(nameof(embedding));
+ if (embedding.Length != VectorDim)
+ {
+ throw new ArgumentException(
+ $"embedding length is {embedding.Length}; index expects {VectorDim}",
+ nameof(embedding));
+ }
+
+ var nearest = Nearest(embedding, user, @namespace, kind, k: 1);
+ double? nearestDistance = nearest.Count > 0 ? nearest[0].Distance : null;
+ if (nearest.Count > 0
+ && nearest[0].Distance is double d
+ && d <= DedupThreshold)
+ {
+ BumpHitCount(nearest[0].Id);
+ return new WriteResult(
+ Id: nearest[0].Id, Deduped: true, ExistingDistance: nearestDistance);
+ }
+
+ string id = Guid.NewGuid().ToString("N").Substring(0, 12);
+ string key = MemoryKey(id);
+ double now = UnixSeconds();
+
+ // Build the JSON doc as a Dictionary so
+ // System.Text.Json serialises the float[] embedding as a
+ // bare JSON array — the encoding RediSearch expects when
+ // indexing a JSON path as a vector field.
+ var doc = new Dictionary
+ {
+ ["id"] = id,
+ ["user"] = user,
+ ["namespace"] = @namespace,
+ ["kind"] = kind,
+ ["source_thread"] = sourceThread,
+ ["text"] = text,
+ ["embedding"] = embedding,
+ ["created_ts"] = now,
+ ["hit_count"] = 0,
+ };
+ long? ttl = ttlSeconds ?? ResolveTtl(kind);
+
+ // MULTI/EXEC so JSON.SET and EXPIRE either both apply or
+ // neither does. A connection drop between the two writes
+ // would otherwise leave the memory without an expiry — the
+ // index entry would still be there, but an `episodic` doc
+ // would outlive its intended seven-day TTL.
+ //
+ // NRedisStack exposes its JSON helpers only on `IDatabase`,
+ // not on `ITransaction`, so we drop down to the raw
+ // `ExecuteAsync("JSON.SET", ...)` command for the transactional
+ // path. The document body is JSON text either way.
+ string docJson = JsonSerializer.Serialize(doc);
+ var tx = _db.CreateTransaction();
+ _ = tx.ExecuteAsync("JSON.SET", key, "$", docJson);
+ if (ttl is long t)
+ {
+ _ = tx.KeyExpireAsync(key, TimeSpan.FromSeconds(t));
+ }
+ if (!tx.Execute())
+ {
+ throw new RedisServerException("remember MULTI/EXEC was discarded");
+ }
+ return new WriteResult(Id: id, Deduped: false, ExistingDistance: nearestDistance);
+ }
+
+ // ------------------------------------------------------------------
+ // Recall
+ // ------------------------------------------------------------------
+
+ ///
+ /// Return the top-k in-scope memories ranked by similarity.
+ /// Memories beyond (or the
+ /// instance default) are dropped — the index always returns
+ /// something for KNN, so a recall result on an unrelated
+ /// query would otherwise be a confidently-wrong false positive.
+ ///
+ public List Recall(
+ float[] queryEmbedding,
+ string user = "default",
+ string? @namespace = "default",
+ string? kind = null,
+ int k = 5,
+ double? distanceThreshold = null)
+ {
+ double threshold = distanceThreshold ?? RecallThreshold;
+ var candidates = Nearest(queryEmbedding, user, @namespace, kind, k);
+ return candidates
+ .Where(c => c.Distance is double d && d <= threshold)
+ .ToList();
+ }
+
+ // ------------------------------------------------------------------
+ // Admin / inspection
+ // ------------------------------------------------------------------
+
+ public IndexSnapshot IndexInfo()
+ {
+ try
+ {
+ var info = _ft.Info(IndexName);
+ return new IndexSnapshot(
+ NumDocs: info.NumDocs,
+ IndexingFailures: info.HashIndexingFailures);
+ }
+ catch (RedisServerException)
+ {
+ return new IndexSnapshot(0, 0);
+ }
+ }
+
+ public List ListMemories(
+ string? user = "default",
+ string? @namespace = "default",
+ string? kind = null,
+ int limit = 100)
+ {
+ // Match `Recall`'s defaults so listing and KNN recall agree
+ // on which memories are in scope for the same caller inputs.
+ // Pass `null` (or `""`) on either argument to opt out of the
+ // TAG filter and list across every scope.
+ string filterClause = BuildFilterClause(user, @namespace, kind);
+ var query = new Query(filterClause)
+ .ReturnFields(
+ "user", "namespace", "kind", "source_thread",
+ "text", "created_ts", "hit_count")
+ .Limit(0, limit)
+ .SetSortBy("created_ts", ascending: false)
+ .Dialect(2);
+ SearchResult result;
+ try
+ {
+ result = _ft.Search(IndexName, query);
+ }
+ catch (RedisServerException)
+ {
+ return new List();
+ }
+ var out_ = new List(result.Documents.Count);
+ foreach (var doc in result.Documents)
+ {
+ string memoryId = StripPrefix(doc.Id ?? "");
+ var props = doc.GetProperties().ToDictionary(p => p.Key, p => p.Value);
+ TimeSpan? ttl = _db.KeyTimeToLive(MemoryKey(memoryId));
+ long? ttlSeconds = ttl is { TotalSeconds: > 0 } v ? (long)v.TotalSeconds : null;
+ out_.Add(BuildRecord(memoryId, props, ttlSeconds, distance: null));
+ }
+ return out_;
+ }
+
+ public bool DeleteMemory(string memoryId) => _db.KeyDelete(MemoryKey(memoryId));
+
+ ///
+ /// Drop the index and every memory document. Returns the count
+ /// of documents that were removed. In production the equivalent
+ /// is FLUSHDB on a dedicated memory database, or letting
+ /// TTLs and eviction expire entries naturally.
+ ///
+ public long Clear()
+ {
+ long before = IndexInfo().NumDocs;
+ DropIndex(deleteDocuments: true);
+ CreateIndex();
+ return before;
+ }
+
+ // ------------------------------------------------------------------
+ // Internals
+ // ------------------------------------------------------------------
+
+ private List Nearest(
+ float[] embedding, string? user, string? @namespace, string? kind, int k)
+ {
+ if (embedding.Length != VectorDim)
+ {
+ throw new ArgumentException(
+ $"embedding length is {embedding.Length}; index expects {VectorDim}",
+ nameof(embedding));
+ }
+ string filterClause = BuildFilterClause(user, @namespace, kind);
+ string knnQuery = $"{filterClause}=>[KNN {k} @embedding $vec AS distance]";
+ byte[] vecBytes = LocalEmbedder.ToBytes(embedding);
+
+ var query = new Query(knnQuery)
+ .ReturnFields(
+ "user", "namespace", "kind", "source_thread",
+ "text", "created_ts", "hit_count", "distance")
+ .SetSortBy("distance", ascending: true)
+ .Limit(0, k)
+ .AddParam("vec", vecBytes)
+ .Dialect(2);
+ var result = _ft.Search(IndexName, query);
+ if (result.Documents is null || result.Documents.Count == 0)
+ {
+ return new List();
+ }
+ var out_ = new List(result.Documents.Count);
+ foreach (var doc in result.Documents)
+ {
+ // `doc.Id` is the full Redis key (e.g.
+ // `agent:mem:abc123`). Strip the prefix so the returned
+ // record exposes only the opaque id the UI and
+ // `DeleteMemory` work with.
+ string memoryId = StripPrefix(doc.Id ?? "");
+ var props = doc.GetProperties().ToDictionary(p => p.Key, p => p.Value);
+ double distance = ParseDouble(props.GetValueOrDefault("distance"), 0.0);
+ TimeSpan? ttl = _db.KeyTimeToLive(MemoryKey(memoryId));
+ long? ttlSeconds = ttl is { TotalSeconds: > 0 } v ? (long)v.TotalSeconds : null;
+ out_.Add(BuildRecord(memoryId, props, ttlSeconds, distance));
+ }
+ return out_;
+ }
+
+ private void BumpHitCount(string memoryId)
+ {
+ try
+ {
+ _json.NumIncrby(MemoryKey(memoryId), "$.hit_count", 1);
+ }
+ catch (RedisServerException)
+ {
+ // The doc may have expired between recall and bump —
+ // fine, we just lose the hit count update.
+ }
+ }
+
+ private string StripPrefix(string rawKey)
+ => rawKey.StartsWith(KeyPrefix) ? rawKey.Substring(KeyPrefix.Length) : rawKey;
+
+ private long? ResolveTtl(string kind)
+ => _ttlByKind.TryGetValue(kind, out var ttl) ? ttl : null;
+
+ private static MemoryRecord BuildRecord(
+ string memoryId,
+ Dictionary props,
+ long? ttlSeconds,
+ double? distance)
+ => new(
+ Id: memoryId,
+ User: ToStringSafe(props, "user"),
+ Namespace: ToStringSafe(props, "namespace"),
+ Kind: ToStringSafe(props, "kind"),
+ SourceThread: ToStringSafe(props, "source_thread"),
+ Text: ToStringSafe(props, "text"),
+ CreatedTs: ParseDouble(props.GetValueOrDefault("created_ts"), 0),
+ HitCount: (long)ParseDouble(props.GetValueOrDefault("hit_count"), 0),
+ Distance: distance,
+ TtlSeconds: ttlSeconds);
+
+ internal static string EscapeTagValue(string value)
+ {
+ if (string.IsNullOrEmpty(value)) return "";
+ var sb = new System.Text.StringBuilder(value.Length);
+ foreach (var ch in value)
+ {
+ if (TagSpecial.Contains(ch)) sb.Append('\\');
+ sb.Append(ch);
+ }
+ return sb.ToString();
+ }
+
+ internal static string BuildFilterClause(string? user, string? @namespace, string? kind)
+ {
+ var clauses = new List(3);
+ if (!string.IsNullOrEmpty(user))
+ clauses.Add($"@user:{{{EscapeTagValue(user!)}}}");
+ if (!string.IsNullOrEmpty(@namespace))
+ clauses.Add($"@namespace:{{{EscapeTagValue(@namespace!)}}}");
+ if (!string.IsNullOrEmpty(kind))
+ clauses.Add($"@kind:{{{EscapeTagValue(kind!)}}}");
+ return clauses.Count == 0
+ ? "(*)"
+ : "(" + string.Join(" ", clauses) + ")";
+ }
+
+ private static string ToStringSafe(Dictionary props, string key)
+ => props.GetValueOrDefault(key).ToString() ?? "";
+
+ private static double ParseDouble(RedisValue value, double fallback)
+ {
+ if (value.IsNullOrEmpty) return fallback;
+ if (value.TryParse(out double d)) return d;
+ return fallback;
+ }
+
+ private static double UnixSeconds()
+ => DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() / 1000.0;
+}
diff --git a/content/develop/use-cases/agent-memory/dotnet/Program.cs b/content/develop/use-cases/agent-memory/dotnet/Program.cs
new file mode 100644
index 0000000000..050f7b3315
--- /dev/null
+++ b/content/develop/use-cases/agent-memory/dotnet/Program.cs
@@ -0,0 +1,759 @@
+using System.Diagnostics;
+using System.Globalization;
+using System.Net;
+using System.Text;
+using System.Text.Json;
+using System.Web;
+using StackExchange.Redis;
+
+namespace AgentMemoryDemo;
+
+///
+/// Redis agent-memory demo server (.NET 8 + NRedisStack + ONNX
+/// Runtime).
+///
+///
+/// Run this and visit http://localhost:8093 to drive a
+/// small agent-memory demo backed by Redis Hashes, JSON, Search, and
+/// Streams. The UI lets you type a turn, watch working memory update,
+/// see semantically similar long-term memories recalled, watch the
+/// write-time deduplication skip near-duplicates, and inspect the
+/// per-thread event log.
+///
+/// The server holds a single , one
+/// , one , and
+/// one for the lifetime of the process.
+/// The first run downloads the embedding model into
+/// ./model_cache; everything after is local.
+///
+public static class Program
+{
+ private const string StackLabel =
+ "NRedisStack + ONNX Runtime + .NET HttpListener";
+
+ // 1 MiB cap on POST bodies so a runaway client (or a `curl
+ // --data-binary @big-file` by mistake) can't accumulate
+ // unbounded memory before the handler runs. The demo's largest
+ // legitimate body is a few hundred bytes of form-encoded query
+ // fields; 1 MiB is a generous ceiling.
+ private const int MaxBodyBytes = 1 * 1024 * 1024;
+
+ public static int Main(string[] argv)
+ {
+ // The embedding is stored inside JSON documents as a JSON
+ // array — host endianness doesn't matter there — but the
+ // *query* vector is sent to Redis as raw little-endian
+ // float32 bytes via the $vec param. The packer in
+ // LocalEmbedder writes little-endian explicitly through
+ // BinaryPrimitives, so a hypothetical big-endian .NET host
+ // would still produce the correct bytes; every supported
+ // runtime today is little-endian and a surprise here would
+ // silently corrupt every recall query, so assert it loudly
+ // at startup.
+ Debug.Assert(BitConverter.IsLittleEndian,
+ "this demo assumes a little-endian host");
+
+ Args args;
+ try
+ {
+ args = Args.Parse(argv);
+ }
+ catch (ArgumentException ex)
+ {
+ Console.Error.WriteLine($"Error: {ex.Message}");
+ PrintHelp();
+ return 2;
+ }
+
+ // Resources held for the lifetime of the process. Declared
+ // here so the try/finally below disposes them on every exit
+ // path, including the early `return 1` branches that follow
+ // a successful Redis connect or model load.
+ ConnectionMultiplexer? mux = null;
+ LocalEmbedder? embedder = null;
+ HttpListener? listener = null;
+ try
+ {
+ try
+ {
+ mux = ConnectionMultiplexer.Connect(new ConfigurationOptions
+ {
+ EndPoints = { { args.RedisHost, args.RedisPort } },
+ AbortOnConnectFail = false,
+ ConnectTimeout = 2000,
+ SyncTimeout = 5000,
+ });
+ mux.GetDatabase().Ping();
+ }
+ catch (Exception ex)
+ {
+ Console.Error.WriteLine(
+ $"Error: cannot reach Redis at {args.RedisHost}:{args.RedisPort}");
+ Console.Error.WriteLine($" ({ex.Message})");
+ return 1;
+ }
+
+ var db = mux.GetDatabase();
+ var session = new AgentSession(
+ db,
+ keyPrefix: args.SessionKeyPrefix,
+ defaultTtlSeconds: args.SessionTtlSeconds);
+ var memory = new LongTermMemory(
+ db,
+ indexName: args.MemIndexName,
+ keyPrefix: args.MemKeyPrefix,
+ dedupThreshold: args.DedupThreshold,
+ recallThreshold: args.RecallThreshold);
+ memory.CreateIndex();
+ var events = new AgentEventLog(db, keyPrefix: args.EventKeyPrefix);
+
+ Console.WriteLine(
+ "Loading embedding model (first run downloads ~90 MB of ONNX weights)...");
+ try
+ {
+ embedder = LocalEmbedder.CreateAsync().GetAwaiter().GetResult();
+ }
+ catch (Exception ex)
+ {
+ Console.Error.WriteLine($"Error loading embedder: {ex.Message}");
+ return 1;
+ }
+
+ var demo = new AgentMemoryDemo(session, memory, events, embedder);
+
+ if (args.ResetOnStart)
+ {
+ Console.WriteLine(
+ $"Dropping any existing memories under '{args.MemKeyPrefix}*' and " +
+ "re-seeding from the sample memory list (pass --no-reset to keep).");
+ int seeded = demo.SeedAll("default", "default");
+ Console.WriteLine($"Seeded {seeded} memories.");
+ }
+
+ // Load index.html once and substitute the template tokens so
+ // the docs panel shows the actual values in use rather than
+ // the default copies. The file ships next to the binary via
+ // the entry in the .csproj.
+ string htmlPath = Path.Combine(AppContext.BaseDirectory, "index.html");
+ if (!File.Exists(htmlPath))
+ {
+ Console.Error.WriteLine(
+ $"index.html not found next to the binary at {htmlPath}.");
+ return 1;
+ }
+ string rawHtml = File.ReadAllText(htmlPath);
+ string htmlPage = rawHtml
+ .Replace("__SESSION_PREFIX__", args.SessionKeyPrefix)
+ .Replace("__MEM_PREFIX__", args.MemKeyPrefix)
+ .Replace("__MEM_INDEX__", args.MemIndexName)
+ .Replace("__EVENT_PREFIX__", args.EventKeyPrefix);
+
+ listener = new HttpListener();
+ // HttpListener prefixes need a trailing slash; '+' wildcard
+ // would require admin rights on macOS/Linux, so we bind to
+ // the literal host string. 127.0.0.1 keeps the demo off the
+ // network by default.
+ string prefix = $"http://{args.Host}:{args.Port}/";
+ listener.Prefixes.Add(prefix);
+ try
+ {
+ listener.Start();
+ }
+ catch (Exception ex)
+ {
+ Console.Error.WriteLine($"Failed to bind {prefix}: {ex.Message}");
+ return 1;
+ }
+
+ Console.WriteLine(
+ $"Redis agent memory demo listening on http://{args.Host}:{args.Port}");
+ Console.WriteLine(
+ $"Using Redis at {args.RedisHost}:{args.RedisPort}"
+ + $" with memory index '{args.MemIndexName}'");
+
+ var cts = new CancellationTokenSource();
+ Console.CancelKeyPress += (_, e) =>
+ {
+ e.Cancel = true;
+ Console.WriteLine("\nShutting down...");
+ cts.Cancel();
+ try { listener.Stop(); } catch { /* best-effort */ }
+ };
+
+ // One handler thread per request out of the ThreadPool. The
+ // ONNX session, the Redis multiplexer, the session helper,
+ // the memory helper, and the event log are all thread-safe;
+ // ``currentThreadId`` on the demo is mutable shared state
+ // but reads and writes race only in the corner case the
+ // walkthrough's Concurrency caveats section documents.
+ while (!cts.IsCancellationRequested)
+ {
+ HttpListenerContext ctx;
+ try
+ {
+ ctx = listener.GetContext();
+ }
+ catch (HttpListenerException) { break; }
+ catch (ObjectDisposedException) { break; }
+ ThreadPool.QueueUserWorkItem(_ =>
+ {
+ try
+ {
+ HandleRequest(ctx, demo, session, memory, events, embedder, htmlPage);
+ }
+ catch (Exception ex)
+ {
+ Console.Error.WriteLine(
+ $"[demo] handler error: {ex.GetType().Name}: {ex.Message}");
+ TrySendError(ctx, ex);
+ }
+ });
+ }
+
+ return 0;
+ }
+ finally
+ {
+ try { listener?.Close(); } catch { /* best-effort */ }
+ embedder?.Dispose();
+ mux?.Dispose();
+ }
+ }
+
+ // ------------------------------------------------------------------
+ // Request dispatch
+ // ------------------------------------------------------------------
+
+ private static void HandleRequest(
+ HttpListenerContext ctx,
+ AgentMemoryDemo demo,
+ AgentSession session,
+ LongTermMemory memory,
+ AgentEventLog events,
+ LocalEmbedder embedder,
+ string htmlPage)
+ {
+ var req = ctx.Request;
+ string path = req.Url?.AbsolutePath ?? "/";
+
+ if (string.Equals(req.HttpMethod, "GET", StringComparison.OrdinalIgnoreCase))
+ {
+ if (path == "/" || path == "/index.html")
+ {
+ SendHtml(ctx, htmlPage);
+ return;
+ }
+ if (path == "/state")
+ {
+ var qs = HttpUtility.ParseQueryString(req.Url?.Query ?? "");
+ string user = OrDefault(qs["user"], demo.DefaultUser);
+ string @namespace = OrDefault(qs["namespace"], demo.DefaultNamespace);
+ SendJson(ctx, BuildState(demo, session, memory, events, embedder, user, @namespace));
+ return;
+ }
+ SendJson(ctx, new { error = "not found" }, 404);
+ return;
+ }
+
+ if (string.Equals(req.HttpMethod, "POST", StringComparison.OrdinalIgnoreCase))
+ {
+ string body = ReadBody(req);
+ var form = HttpUtility.ParseQueryString(body);
+
+ if (path == "/turn")
+ {
+ string text = (form["text"] ?? "").Trim();
+ if (text.Length == 0)
+ {
+ SendJson(ctx, new { error = "text is required" }, 400);
+ return;
+ }
+ double threshold = ClampThreshold(form["threshold"], memory.RecallThreshold);
+ var payload = demo.HandleTurn(
+ text: text,
+ user: OrDefault(form["user"], "default"),
+ @namespace: OrDefault(form["namespace"], "default"),
+ kind: OrDefault(form["kind"], "episodic"),
+ role: OrDefault(form["role"], "user"),
+ threshold: threshold,
+ action: OrDefault(form["action"], "turn"));
+ SendJson(ctx, payload);
+ return;
+ }
+
+ if (path == "/new_thread")
+ {
+ string threadId = demo.NewThread(
+ user: OrDefault(form["user"], "default"),
+ @namespace: OrDefault(form["namespace"], "default"));
+ SendJson(ctx, new { thread_id = threadId });
+ return;
+ }
+
+ if (path == "/reset")
+ {
+ int seeded = demo.SeedAll(
+ user: OrDefault(form["user"], "default"),
+ @namespace: OrDefault(form["namespace"], "default"));
+ SendJson(ctx, new { seeded });
+ return;
+ }
+
+ if (path == "/drop_memory")
+ {
+ string memoryId = (form["memory_id"] ?? "").Trim();
+ if (memoryId.Length == 0)
+ {
+ SendJson(ctx, new { error = "memory_id is required" }, 400);
+ return;
+ }
+ bool deleted = memory.DeleteMemory(memoryId);
+ SendJson(ctx, new { deleted, memory_id = memoryId });
+ return;
+ }
+
+ SendJson(ctx, new { error = "not found" }, 404);
+ return;
+ }
+
+ SendJson(ctx, new { error = "method not allowed" }, 405);
+ }
+
+ private static object BuildState(
+ AgentMemoryDemo demo,
+ AgentSession session,
+ LongTermMemory memory,
+ AgentEventLog events,
+ LocalEmbedder embedder,
+ string user,
+ string @namespace)
+ {
+ var info = memory.IndexInfo();
+ string threadId = demo.CurrentThreadId;
+ var state = session.Load(threadId);
+ var memories = memory.ListMemories(user: user, @namespace: @namespace, limit: 200);
+ var recentEvents = events.Recent(threadId, count: 20);
+ return new
+ {
+ index = new
+ {
+ num_docs = info.NumDocs,
+ indexing_failures = info.IndexingFailures,
+ index_name = memory.IndexName,
+ model = embedder.ModelName,
+ session_ttl_seconds = session.DefaultTtlSeconds,
+ dedup_threshold = memory.DedupThreshold,
+ default_recall_threshold = memory.RecallThreshold,
+ stack_label = StackLabel,
+ },
+ thread_id = threadId,
+ session = state is null ? null : SerializeSession(state),
+ memories = memories.Select(SerializeMemory).ToArray(),
+ events = recentEvents.Select(SerializeEvent).ToArray(),
+ // `recalled` is populated by /turn; on plain /state reads
+ // the UI keeps showing the last turn's result, which is
+ // the useful behaviour for an "agent" panel.
+ recalled = Array.Empty