redis · andy-stark-redis · Jun 10, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/config.toml b/config.toml
@@ -12,6 +12,23 @@ contentDir = "content/"
 
 # ignoreErrors = ["error-remote-getcsv"]
 
+# Stop Hugo's filesystem watcher from opening fds for build artifacts
+# and language-package directories that ship inside use-case demos but
+# are never referenced from a page as a resource. Without these the
+# Rust `target/` and PHP `vendor/` trees alone account for ~8,500 of
+# the files under `content/`, which pushes the watcher past macOS's
+# default `kern.maxfilesperproc` ceiling on `hugo serve`.
+ignoreFiles = [
+  "/vendor/",            # composer / bundler deps
+  "/node_modules/",      # npm deps
+  "/target/",            # rust + maven build output
+  "/bin/",               # .NET build output
+  "/obj/",               # .NET intermediate output
+  "/__pycache__/",       # Python bytecode cache
+  "/models/",            # Hugot model cache
+  "/\\.transformers-cache/",  # TransformersPHP cache
+]
+
 [related]
 [[related.indices]]
   name = 'group'

diff --git a/content/develop/use-cases/_index.md b/content/develop/use-cases/_index.md
@@ -29,3 +29,4 @@ This section provides practical examples and reference implementations for commo
 * [Recommendation engine]({{< relref "/develop/use-cases/recommendation-engine" >}}) - Serve personalized recommendations under tight latency budgets by combining vector similarity with structured filters in a single Redis call
 * [Feature store]({{< relref "/develop/use-cases/feature-store" >}}) - Serve pre-computed ML features on the request path with mixed batch-and-streaming freshness using per-field TTL
 * [Semantic cache]({{< relref "/develop/use-cases/semantic-cache" >}}) - Reuse LLM responses for semantically similar queries to cut token costs and skip multi-second model calls on near-duplicate prompts
+* [Agent memory]({{< relref "/develop/use-cases/agent-memory" >}}) - Give AI agents persistent memory that spans sessions and tasks — working memory per thread, long-term semantic recall, and a time-ordered event log on one Redis instance
diff --git a/content/develop/use-cases/agent-memory/_index.md b/content/develop/use-cases/agent-memory/_index.md
@@ -0,0 +1,77 @@
+---
+categories:
+- docs
+- develop
+- stack
+- oss
+- rs
+- rc
+description: Give AI agents persistent memory that spans sessions and tasks — working memory per thread, long-term semantic recall, and a time-ordered event log — on a single Redis instance, with sub-millisecond reads on the agent loop's hot path.
+hideListLinks: true
+linkTitle: Agent memory
+title: Redis as agent memory
+weight: 8
+---
+
+## When to use Redis as agent memory
+
+Use Redis as the memory layer for an AI agent when each reasoning step needs to recall both *what just happened in this session* and *what the agent has learned over time* under a strict per-step latency budget — without standing up a separate vector database, message broker, and session store for each tier.
+
+## Why the problem is hard
+
+LLMs are stateless. Every API call starts from zero unless the application supplies the relevant context. Without a memory layer, agents re-derive information through extra LLM calls, lose personalization between sessions, and cannot coordinate state in multi-agent deployments. Some of the obvious workarounds have real drawbacks:
+
+-   **A standalone vector database** can index long-term semantic memories, but doesn't cover working session state or an ordered action log, and putting a separate service on the agent's hot path adds latency that compounds across multi-step reasoning loops.
+-   **In-process or app-server session storage** keeps working memory close to the agent, but disappears on process restart and can't be shared across multi-agent or load-balanced deployments — exactly the topology most production agents end up in.
+-   **Stuffing everything into the LLM context window** shifts the cost of memory onto every API call, hits the model's context limit on long-running sessions, and reliably degrades reasoning quality as the context grows.
+
+The core difficulty is that an agent needs *several kinds* of memory at once — short-lived working state per thread, durable semantic recall by meaning, and an audit trail of recent actions — each with its own retention rule and access pattern. Mapping all three onto a single primitive (only a vector index, only a key-value store, only an append log) forces compromises that show up as either lost context or extra LLM calls. Memory must also stay bounded; without deduplication, summarization, and background consolidation, stale context piles up and degrades downstream accuracy.
+
+This pattern is distinct from generic [session storage]({{< relref "/develop/use-cases/session-store" >}}) (spans a single user session, no semantic recall), from [semantic caching]({{< relref "/develop/use-cases/semantic-cache" >}}) (deduplicates LLM calls, not accumulated agent knowledge), and from RAG retrieval against an external document corpus (static reference material, not the agent's own experience).
+
+## What you can expect from a Redis solution
+
+You can:
+
+-   Persist and resume agent sessions by thread ID across restarts and across load-balanced workers.
+-   Recall long-term memories by semantic similarity instead of exact key, scoped per user, namespace, or memory kind.
+-   Prevent memory bloat by deduplicating near-identical memories at write time with the same vector index that powers recall.
+-   Run semantic caching, RAG retrieval, and agent memory together on a single Redis deployment, sharing the same vector index infrastructure.
+-   Keep each step in the agent reasoning loop under budget — Redis reads and writes are sub-millisecond, so the memory layer doesn't dominate per-step latency.
+
+## How Redis supports the solution
+
+In practice, each tier of agent memory maps onto a Redis primitive that's already in the cluster. **Working memory** for an active session is a [Hash]({{< relref "/develop/data-types/hashes" >}}) at a deterministic key such as `agent:session:{thread_id}`, holding the running scratchpad, current goal, and recent turns — written with [`HSET`]({{< relref "/commands/hset" >}}) and read in one round trip with [`HGETALL`]({{< relref "/commands/hgetall" >}}). **Long-term memory** — both episodic ("what happened in past sessions") and semantic ("what the agent has learned about this user or domain") — lives as [JSON]({{< relref "/develop/data-types/json" >}}) documents that carry an embedding vector, indexed by [Redis Search]({{< relref "/develop/ai/search-and-query" >}}) on a [HNSW vector field]({{< relref "/develop/ai/search-and-query/vectors" >}}) together with tag fields (user, namespace, kind, source thread). The agent recalls memories with one [`FT.SEARCH`]({{< relref "/commands/ft.search" >}}) call that combines vector similarity with metadata filtering, and the same similarity check runs at write time to deduplicate near-identical memories before they enter the store. **A time-ordered event log** of the agent's recent actions and observations is a [Stream]({{< relref "/develop/data-types/streams" >}}) appended with [`XADD`]({{< relref "/commands/xadd" >}}), replayed with [`XREVRANGE`]({{< relref "/commands/xrevrange" >}}), and bounded with [`XTRIM`]({{< relref "/commands/xtrim" >}}).
+
+Redis provides the following features that make it a good fit for agent memory:
+
+-   [Hashes]({{< relref "/develop/data-types/hashes" >}}) hold per-session working memory under one key, so loading or persisting a thread's state takes a single round trip.
+-   [JSON]({{< relref "/develop/data-types/json" >}}) documents store each long-term memory together with its embedding vector and metadata, so a similarity search returns everything the agent needs without a second lookup.
+-   [Redis Search]({{< relref "/develop/ai/search-and-query" >}}) with [HNSW vector indexes]({{< relref "/develop/ai/search-and-query/vectors" >}}) recalls memories by meaning in sub-millisecond time, and the same [`FT.SEARCH`]({{< relref "/commands/ft.search" >}}) call applies TAG and NUMERIC filters so user, namespace, and kind scoping happen inside the query rather than in application code.
+-   [Streams]({{< relref "/develop/data-types/streams" >}}) keep an ordered log of agent actions and observations, [`XTRIM`]({{< relref "/commands/xtrim" >}}) bounds retention without manual cleanup, and consumer groups let downstream workers — summarizers, consolidators — replay the log without losing position.
+-   [`EXPIRE`]({{< relref "/commands/expire" >}}) automates memory decay per tier — short TTLs on working memory, longer on episodic long-term memories, no TTL on semantic ones — so stale context falls off without a separate cleanup job. (The event log is bounded separately, by [`XADD MAXLEN`]({{< relref "/commands/xadd" >}}) on the Stream, not by `EXPIRE`.)
+-   Sub-millisecond reads and writes from memory keep each turn of the agent loop under budget, and a single Redis instance can carry working memory, long-term recall, the event log, semantic caching, and RAG retrieval at zero marginal infrastructure cost.
+
+## Ecosystem
+
+The following libraries, frameworks, and managed services build on Redis for agent memory:
+
+-   **Python**: [RedisVL]({{< relref "/develop/ai/redisvl" >}}) provides vector-index, session-manager, and semantic-memory helpers you can compose into an agent memory layer.
+-   **Frameworks**: [LangChain]({{< relref "/integrate/langchain-redis" >}}) supports Redis as a chat history and memory backend, and [LangGraph & Redis](https://redis.io/blog/langgraph-redis-build-smarter-ai-agents-with-memory-persistence/) ships a Redis checkpointer for persisting graph state across runs.
+-   **AWS**: [Amazon Bedrock]({{< relref "/integrate/amazon-bedrock" >}}) agent runtimes integrate with Redis for memory persistence and vector search.
+-   **Any language**: standard Redis client libraries cover the pattern below for custom agent loops.
+-   **Managed**: [Redis Agent Memory Server]({{< relref "/develop/ai/context-engine/agent-memory" >}}) is a managed agent memory service with REST and MCP interfaces, working and long-term memory tiers, deduplication, summarization, and background consolidation — useful when you'd rather not build and operate the pattern below yourself.
+
+## Code examples to build your own Redis agent memory
+
+The following guides show how to build a small Redis-backed agent memory layer using only standard Redis commands — working memory in a hash per thread, long-term memory as JSON documents with a vector index, an event log in a stream, and per-tier TTLs for decay. Each guide includes a runnable interactive demo where you can send turns, watch working memory update, see semantic recall against past memories, and inspect the event log.
+
+* [redis-py (Python)]({{< relref "/develop/use-cases/agent-memory/redis-py" >}})
+* [node-redis (Node.js)]({{< relref "/develop/use-cases/agent-memory/nodejs" >}})
+* [NRedisStack (C#)]({{< relref "/develop/use-cases/agent-memory/dotnet" >}})
+* [redis-rs (Rust)]({{< relref "/develop/use-cases/agent-memory/rust" >}})
+* [go-redis (Go)]({{< relref "/develop/use-cases/agent-memory/go" >}})
+* [Jedis (Java)]({{< relref "/develop/use-cases/agent-memory/java-jedis" >}})
+* [Lettuce (Java)]({{< relref "/develop/use-cases/agent-memory/java-lettuce" >}})
+* [Predis (PHP)]({{< relref "/develop/use-cases/agent-memory/php" >}})
+* [redis-rb (Ruby)]({{< relref "/develop/use-cases/agent-memory/ruby" >}})
diff --git a/content/develop/use-cases/agent-memory/dotnet/.gitignore b/content/develop/use-cases/agent-memory/dotnet/.gitignore
@@ -0,0 +1,7 @@
+bin/
+obj/
+model_cache/
+*.user
+*.suo
+.vs/
+.idea/
diff --git a/content/develop/use-cases/agent-memory/dotnet/AgentEventLog.cs b/content/develop/use-cases/agent-memory/dotnet/AgentEventLog.cs
@@ -0,0 +1,119 @@
+using System.Globalization;
+using StackExchange.Redis;
+
+namespace AgentMemoryDemo;
+
+/// <summary>
+/// Append-only event log for an agent thread, backed by a Redis
+/// Stream.
+/// </summary>
+/// <remarks>
+/// <para>Each thread gets a stream at <c>agent:events:{threadId}</c>.
+/// Every action the agent takes (a user turn arriving, a memory being
+/// recalled, a memory being written, a tool being called) is one
+/// <c>XADD</c> to that stream. Replay with <c>XREVRANGE</c> for the
+/// most recent N events; bound retention with <c>XTRIM MAXLEN ~</c>
+/// so the log stays cheap regardless of how long the thread has been
+/// running.</para>
+///
+/// <para>The stream is independent of the session hash and the
+/// long-term memory store: it answers the "what just happened"
+/// question without competing with either of those for indexing or
+/// memory budget. Consumer groups (not used in this demo) would let
+/// downstream workers — summarisers, consolidators, audit pipelines —
+/// replay the log without losing position.</para>
+/// </remarks>
+public sealed class AgentEventLog
+{
+    /// <summary>
+    /// Approximate cap on stream length. <c>MAXLEN ~</c> lets Redis
+    /// trim in whole-node units instead of exactly-N units, which is
+    /// much cheaper at the cost of overshooting the bound by up to a
+    /// node's worth.
+    /// </summary>
+    public const int DefaultMaxLen = 1000;
+
+    private readonly IDatabase _db;
+    public string KeyPrefix { get; }
+    public int MaxLen { get; }
+
+    public AgentEventLog(
+        IDatabase db,
+        string keyPrefix = "agent:events:",
+        int maxLen = DefaultMaxLen)
+    {
+        _db = db;
+        KeyPrefix = keyPrefix;
+        MaxLen = maxLen;
+    }
+
+    public string StreamKey(string threadId) => KeyPrefix + threadId;
+
+    /// <summary>
+    /// Append one event and return its stream id.
+    /// </summary>
+    /// <remarks>
+    /// <c>MAXLEN ~ N</c> keeps the stream bounded with near-zero
+    /// overhead; an exact bound (<c>MAXLEN N</c> without the tilde)
+    /// forces a scan and is rarely worth the cost.
+    /// </remarks>
+    public string Record(string threadId, string action, string detail = "")
+    {
+        var fields = new NameValueEntry[]
+        {
+            new("action", action),
+            new("detail", detail),
+            new("ts", UnixSeconds().ToString("F6", CultureInfo.InvariantCulture)),
+        };
+        // StreamAdd's `useApproximateMaxLength: true` issues
+        // `MAXLEN ~ N` rather than the exact form.
+        RedisValue id = _db.StreamAdd(
+            StreamKey(threadId),
+            fields,
+            messageId: null,
+            maxLength: MaxLen,
+            useApproximateMaxLength: true);
+        return (string)id!;
+    }
+
+    /// <summary>Return the most recent events, newest first.</summary>
+    /// <remarks>
+    /// <para>StackExchange.Redis swaps the <c>minId</c> / <c>maxId</c>
+    /// arguments when it issues <c>XREVRANGE</c> under
+    /// <see cref="Order.Descending"/>, so the caller still passes
+    /// "low, high" in natural order (<c>-</c> / <c>+</c>). Passing
+    /// them the other way around — <c>+</c> / <c>-</c> — would issue
+    /// <c>XREVRANGE key - +</c>, which Redis interprets as an empty
+    /// range and returns nothing.</para>
+    /// </remarks>
+    public List<AgentEvent> Recent(string threadId, int count = 20)
+    {
+        var entries = _db.StreamRange(
+            StreamKey(threadId), "-", "+", count: count, messageOrder: Order.Descending);
+        var out_ = new List<AgentEvent>(entries.Length);
+        foreach (var entry in entries)
+        {
+            var fields = entry.Values.ToDictionary(v => (string)v.Name!, v => (string)v.Value!);
+            out_.Add(new AgentEvent(
+                EventId: (string)entry.Id!,
+                ThreadId: threadId,
+                Action: fields.GetValueOrDefault("action") ?? "",
+                Detail: fields.GetValueOrDefault("detail") ?? "",
+                Ts: ParseDouble(fields.GetValueOrDefault("ts"), 0)));
+        }
+        return out_;
+    }
+
+    /// <summary>Current stream length.</summary>
+    public long Length(string threadId) => _db.StreamLength(StreamKey(threadId));
+
+    /// <summary>Drop the entire stream for a thread.</summary>
+    public bool Clear(string threadId) => _db.KeyDelete(StreamKey(threadId));
+
+    private static double UnixSeconds()
+        => DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() / 1000.0;
+
+    private static double ParseDouble(string? value, double fallback)
+        => double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var d)
+            ? d : fallback;
+}
diff --git a/content/develop/use-cases/agent-memory/dotnet/AgentMemoryDemo.csproj b/content/develop/use-cases/agent-memory/dotnet/AgentMemoryDemo.csproj
@@ -0,0 +1,40 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>AgentMemoryDemo</RootNamespace>
+    <AssemblyName>AgentMemoryDemo</AssemblyName>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <LangVersion>latest</LangVersion>
+    <InvariantGlobalization>false</InvariantGlobalization>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <!-- Redis client. NRedisStack adds Search, JSON, and Streams
+         helpers on top of StackExchange.Redis; this demo uses all
+         three subsystems. -->
+    <PackageReference Include="NRedisStack" Version="1.4.0" />
+
+    <!-- ONNX Runtime to run the sentence-transformers MiniLM model
+         locally. The CPU EP works on macOS/Linux/Windows without
+         native install steps. -->
+    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.20.1" />
+
+    <!-- BertTokenizer reads the standard MiniLM vocab.txt; this is
+         the shortest path to producing the exact input_ids the ONNX
+         export expects. -->
+    <PackageReference Include="Microsoft.ML.Tokenizers" Version="1.0.2" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <!-- Ship index.html next to the binary so the HttpListener can
+         load it from disk at startup; the demo expects the file to
+         sit beside AgentMemoryDemo.dll. -->
+    <None Include="index.html">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>