diff --git a/infra/main.bicep b/infra/main.bicep
index d229d716..904fee03 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -301,8 +301,12 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en
     disableIpMasking: false
     flowType: 'Bluefield'
     // WAF aligned configuration for Monitoring
+    // The AVM `insights/component` module wires Application Insights to
+    // the Log Analytics workspace via `workspaceResourceId` (workspace-
+    // based App Insights). A separate `diagnosticSettings` entry on the
+    // SAME workspace causes duplicate ingestion of platform logs.
+    // Source: AB#37816 — see CKM #811 reference implementation.
     workspaceResourceId: enableMonitoring ? logAnalyticsWorkspaceResourceId : ''
-    diagnosticSettings: enableMonitoring ? [{ workspaceResourceId: logAnalyticsWorkspaceResourceId }] : null
   }
 }
 
diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep
index fea69ac3..f93b93ab 100644
--- a/infra/main_custom.bicep
+++ b/infra/main_custom.bicep
@@ -279,8 +279,12 @@ module applicationInsights 'br/public:avm/res/insights/component:0.6.0' = if (en
     disableIpMasking: false
     flowType: 'Bluefield'
     // WAF aligned configuration for Monitoring
+    // The AVM `insights/component` module wires Application Insights to
+    // the Log Analytics workspace via `workspaceResourceId` (workspace-
+    // based App Insights). A separate `diagnosticSettings` entry on the
+    // SAME workspace causes duplicate ingestion of platform logs.
+    // Source: AB#37816 — see CKM #811 reference implementation.
     workspaceResourceId: enableMonitoring ? logAnalyticsWorkspaceResourceId : ''
-    diagnosticSettings: enableMonitoring ? [{ workspaceResourceId: logAnalyticsWorkspaceResourceId }] : null
   }
 }
 
diff --git a/src/backend-api/pyproject.toml b/src/backend-api/pyproject.toml
index dd746c45..b3450503 100644
--- a/src/backend-api/pyproject.toml
+++ b/src/backend-api/pyproject.toml
@@ -9,12 +9,14 @@ dependencies = [
     "azure-ai-agents==1.2.0b3",
     "azure-appconfiguration==1.7.1",
     "azure-identity==1.25.0",
+    "azure-monitor-events-extension==0.1.0",
     "azure-monitor-opentelemetry==1.7.0",
     "azure-search-documents==11.6.0b12",
     "azure-storage-blob==12.26.0",
     "azure-storage-queue==12.13.0",
     "fastapi[standard]==0.116.1",
     "httpx==0.28.1",
+    "opentelemetry-instrumentation-fastapi==0.57b0",
     "pydantic-settings==2.10.1",
     "python-dotenv",
     "python-multipart==0.0.22",
diff --git a/src/backend-api/src/app/.env.example b/src/backend-api/src/app/.env.example
index f7444c62..91201dc0 100644
--- a/src/backend-api/src/app/.env.example
+++ b/src/backend-api/src/app/.env.example
@@ -7,3 +7,17 @@ APP_CONFIGURATION_URL=""
 # AZURE_PACKAGE_LOGGING_LEVEL="WARNING"  # Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
 # AZURE_LOGGING_PACKAGES="azure.core.pipeline.policies.http_logging_policy,azure.storage.blob,azure.storage.queue,azure.core,azure.identity,azure.storage,azure.core.pipeline,azure.core.pipeline.policies,azure.core.pipeline.transport,openai,openai._client,httpx,httpcore,semantic_kernel,urllib3,msal"
 
+# ------------------------------------------------------------------
+# Application Insights / OpenTelemetry
+# ------------------------------------------------------------------
+# When deployed via the bundled Bicep, this value is injected by
+# `infra/main.bicep` -> `containerAppBackend` from the
+# `applicationInsights.outputs.connectionString` output. Leave unset for
+# local dev to skip telemetry export entirely (the app will log a single
+# warning at startup and otherwise behave normally).
+# APPLICATIONINSIGHTS_CONNECTION_STRING=""
+
+# Optional: clamp basic logging level for App Insights ingestion.
+# Defaults inherit from APP_LOGGING_LEVEL above.
+# AZURE_BASIC_LOGGING_LEVEL="INFO"
+
diff --git a/src/backend-api/src/app/application.py b/src/backend-api/src/app/application.py
index 4100decf..559a1476 100644
--- a/src/backend-api/src/app/application.py
+++ b/src/backend-api/src/app/application.py
@@ -1,9 +1,14 @@
+import logging
 import os
 from datetime import datetime
 
 from fastapi.middleware.cors import CORSMiddleware
 from libs.base.application_base import Application_Base
 from libs.base.typed_fastapi import TypedFastAPI
+from libs.logging.span_filters import (
+    DropASGIResponseBodySpanProcessor,
+    DropCosmosDependencySpanProcessor,
+)
 from libs.repositories.file_repository import FileRepository
 from libs.repositories.process_repository import ProcessRepository
 from libs.repositories.process_status_repository import ProcessStatusRepository
@@ -20,6 +25,14 @@
 from routers import router_debug, router_files, router_process
 from routers.http_probes import router as http_probes
 
+logger = logging.getLogger(__name__)
+
+# URLs (relative paths) that should NOT generate request telemetry.
+# Matches the routes registered in `routers/http_probes.py`.
+# `excluded_urls` is a comma-separated substring list per the
+# OpenTelemetry FastAPI instrumentation contract.
+_OTEL_EXCLUDED_URLS = "health,startup"
+
 
 class Application(Application_Base):
     """
@@ -57,11 +70,94 @@ def initialize(self):
             allow_headers=["*"],
         )
 
+        # Wire up Azure Monitor / OpenTelemetry BEFORE the routers are
+        # included so that the FastAPI instrumentor can patch the app
+        # while the route table is still empty. `configure_azure_monitor`
+        # is a no-op (warns once) when the connection string env var is
+        # absent — see `libs/logging/event_utils.py`.
+        self._configure_azure_monitor()
+
         self.app.include_router(http_probes)
         self._register_dependencies()
         self._config_routers()
         # self._initialize_database()
 
+        # Instrumenting AFTER routers are registered means every route is
+        # automatically wrapped by the OTEL middleware.
+        self._instrument_fastapi()
+
+    def _configure_azure_monitor(self):
+        """Initialise Azure Monitor OpenTelemetry exporter, if configured.
+
+        This is the App Insights "linkage" step required by AC #1 / AC #2:
+        if `APPLICATIONINSIGHTS_CONNECTION_STRING` is set in the
+        container app environment (wired by Bicep — see
+        `infra/main.bicep`), we hand it to `configure_azure_monitor`
+        along with our two noise-suppressing span processors.
+
+        Live Metrics is enabled so the team can watch the Maintenance
+        environment in real time during demo validation.
+        """
+        connection_string = os.environ.get(
+            "APPLICATIONINSIGHTS_CONNECTION_STRING", ""
+        ).strip()
+        if not connection_string:
+            logger.info(
+                "APPLICATIONINSIGHTS_CONNECTION_STRING not set; "
+                "skipping Azure Monitor OpenTelemetry configuration."
+            )
+            return
+
+        try:
+            from azure.monitor.opentelemetry import configure_azure_monitor
+
+            configure_azure_monitor(
+                connection_string=connection_string,
+                enable_live_metrics=True,
+                span_processors=[
+                    DropASGIResponseBodySpanProcessor(),
+                    DropCosmosDependencySpanProcessor(),
+                ],
+            )
+            # Do NOT log the connection string itself — it contains the
+            # ingestion key. Logging only the fact of configuration.
+            logger.info(
+                "Azure Monitor OpenTelemetry configured (live metrics enabled)."
+            )
+        except Exception:  # noqa: BLE001 — telemetry must never break startup
+            logger.exception(
+                "Failed to configure Azure Monitor OpenTelemetry; "
+                "continuing without App Insights export."
+            )
+
+    def _instrument_fastapi(self):
+        """Apply the OpenTelemetry FastAPI instrumentation.
+
+        Excludes the liveness / startup probe routes (registered in
+        `routers/http_probes.py`) so probe traffic does not flood
+        Application Insights with no-information request rows.
+        """
+        if not os.environ.get(
+            "APPLICATIONINSIGHTS_CONNECTION_STRING", ""
+        ).strip():
+            # No exporter wired up; instrumenting is a wasted import.
+            return
+        try:
+            from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+
+            FastAPIInstrumentor.instrument_app(
+                self.app, excluded_urls=_OTEL_EXCLUDED_URLS
+            )
+            logger.info(
+                "FastAPIInstrumentor attached (excluded_urls=%s).",
+                _OTEL_EXCLUDED_URLS,
+            )
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "Failed to attach FastAPIInstrumentor; "
+                "continuing without per-request telemetry."
+            )
+
     def _config_routers(self):
         """
         Configure routers for the FastAPI application.
diff --git a/src/backend-api/src/app/libs/base/application_base.py b/src/backend-api/src/app/libs/base/application_base.py
index f726091b..2bba922a 100644
--- a/src/backend-api/src/app/libs/base/application_base.py
+++ b/src/backend-api/src/app/libs/base/application_base.py
@@ -15,6 +15,19 @@
 from libs.application.application_context import AppContext
 from libs.azure.app_configuration import AppConfigurationHelper
 
+# Logger packages that emit at INFO/DEBUG levels often enough to drown
+# out signal in Application Insights logs when the OTEL log handler is
+# attached. We force them to WARNING regardless of caller config.
+# This is a backstop in addition to (not a replacement for) the
+# `AZURE_LOGGING_PACKAGES` env-var driven filtering — anything listed
+# here is ALWAYS clamped to WARNING.
+_NOISY_LOGGER_PACKAGES = (
+    "azure.core.pipeline.policies.http_logging_policy",
+    "azure.cosmos",
+    "opentelemetry.sdk",
+    "azure.monitor.opentelemetry.exporter.export._base",
+)
+
 
 class Application_Base(ABC):
     application_context: AppContext = None
@@ -78,6 +91,17 @@ def __init__(self, env_file_path: str | None = None, **data):
                 ):
                     logging.getLogger(logger_name).setLevel(azure_level)
 
+            # Hard-suppress known noisy packages regardless of operator
+            # config. Without this, the App Insights logs view is
+            # dominated by per-request HTTP policy logs and per-call
+            # Cosmos diagnostics — see AC #3 / AC #4 of AB#37816.
+            # We never lower a logger that the operator has explicitly
+            # raised below WARNING.
+            for noisy_pkg in _NOISY_LOGGER_PACKAGES:
+                noisy_logger = logging.getLogger(noisy_pkg)
+                if noisy_logger.level == logging.NOTSET or noisy_logger.level < logging.WARNING:
+                    noisy_logger.setLevel(logging.WARNING)
+
         # Initialize the application
         self.initialize()
 
diff --git a/src/backend-api/src/app/libs/logging/__init__.py b/src/backend-api/src/app/libs/logging/__init__.py
new file mode 100644
index 00000000..7f9bd519
--- /dev/null
+++ b/src/backend-api/src/app/libs/logging/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Application-level logging and telemetry helpers.
+
+This subpackage hosts the small Application Insights / OpenTelemetry
+integration helpers used by the backend-api application:
+
+- ``event_utils`` — a tiny wrapper around
+  ``azure.monitor.events.extension.track_event`` that no-ops when the
+  ``APPLICATIONINSIGHTS_CONNECTION_STRING`` environment variable is not
+  configured. Callers can therefore emit structured events from
+  any router/service without conditionally guarding each call site.
+- ``span_filters`` — custom OpenTelemetry ``SpanProcessor`` implementations
+  that drop noisy spans before they are exported to Application Insights
+  (per-chunk ASGI ``http.response.body`` spans and Cosmos DB dependency
+  spans). These keep the App Insights ingestion cost and the
+  end-to-end transaction view clean for the Container Migration workflow.
+
+Nothing in this subpackage imports Azure SDKs at module-import time, so
+it is safe to import from contexts where the App Insights SDK may not be
+fully wired up yet (e.g. application bootstrap before
+``configure_azure_monitor`` has run).
+"""
diff --git a/src/backend-api/src/app/libs/logging/event_utils.py b/src/backend-api/src/app/libs/logging/event_utils.py
new file mode 100644
index 00000000..742e7f75
--- /dev/null
+++ b/src/backend-api/src/app/libs/logging/event_utils.py
@@ -0,0 +1,132 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Lightweight helpers for emitting Application Insights *custom events*.
+
+The backend-api routers want to emit structured events such as
+``"UploadFilesSuccess"`` / ``"UploadFilesError"`` for business-level
+observability — independent of whatever distributed-tracing spans the
+OpenTelemetry instrumentation produces. ``azure-monitor-events-extension``
+provides ``track_event`` for exactly this purpose, but two practical
+problems show up in production:
+
+1. ``track_event`` raises (or warns repeatedly) when called before
+   ``configure_azure_monitor`` has been invoked — for example in unit
+   tests or in local dev runs where ``APPLICATIONINSIGHTS_CONNECTION_STRING``
+   is intentionally unset.
+2. Importing ``azure.monitor.events.extension`` eagerly at module
+   import time slows cold-start and pulls in the OTEL log SDK even for
+   code paths that never emit a custom event.
+
+``track_event_if_configured`` solves both problems. It:
+
+* short-circuits when the connection string env var is empty/unset and
+  emits a single warning the first time it is called (subsequent
+  unconfigured calls are silent — see ``_warned_unconfigured`` below);
+* lazily imports ``azure.monitor.events.extension`` only on the first
+  configured call;
+* swallows and logs export failures so that telemetry problems can
+  never break a request.
+
+The function is deliberately small and side-effect-free outside the
+optional Application Insights call. Tests can therefore verify both
+the gating behaviour and the export behaviour without standing up a
+real OpenTelemetry pipeline.
+"""
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Mapping
+
+logger = logging.getLogger(__name__)
+
+# Environment variable that ``configure_azure_monitor`` keys off and that
+# we use as the single source of truth for "is App Insights configured".
+APP_INSIGHTS_CONN_STRING_ENV = "APPLICATIONINSIGHTS_CONNECTION_STRING"
+
+# Public message constant so tests can assert on the wording without
+# duplicating the string. We deliberately do not include the env-var
+# value in any log message — see hard-constraint #8 (never echo secrets).
+_UNCONFIGURED_WARNING = (
+    "APPLICATIONINSIGHTS_CONNECTION_STRING is not set; "
+    "track_event_if_configured(name=%s) is a no-op."
+)
+
+# Module-level latch so we warn at most once per process when the
+# connection string is missing. Reset by tests via
+# ``reset_unconfigured_warning_for_tests``.
+_warned_unconfigured: bool = False
+
+
+def _is_app_insights_configured() -> bool:
+    """Return True iff the App Insights connection string is non-empty.
+
+    Reading the environment on every call (rather than caching at import
+    time) is intentional: ``Application_Base.__init__`` may load the
+    ``.env`` file or pull values from Azure App Configuration *after*
+    this module has been imported.
+    """
+    value = os.environ.get(APP_INSIGHTS_CONN_STRING_ENV)
+    return bool(value and value.strip())
+
+
+def reset_unconfigured_warning_for_tests() -> None:
+    """Test-only helper: reset the once-per-process warning latch."""
+    global _warned_unconfigured
+    _warned_unconfigured = False
+
+
+def track_event_if_configured(
+    name: str, properties: Mapping[str, Any] | None = None
+) -> None:
+    """Emit an Application Insights custom event, gated on configuration.
+
+    Parameters
+    ----------
+    name:
+        Event name as it should appear in the App Insights ``customEvents``
+        table. Use ``PascalCase`` for consistency with the rest of the
+        product (e.g. ``"UploadFilesSuccess"``, ``"StartProcessingError"``).
+    properties:
+        Optional mapping of string keys to JSON-serialisable values that
+        will land in ``customDimensions``. ``None`` is normalised to an
+        empty dict before being forwarded.
+
+    Behaviour
+    ---------
+    * If ``APPLICATIONINSIGHTS_CONNECTION_STRING`` is unset or empty,
+      this is a no-op. A single warning is logged the first time this
+      occurs in the process; subsequent calls are silent.
+    * If the env var is set, ``azure.monitor.events.extension.track_event``
+      is invoked. Any exception raised during export is caught and logged
+      at ``WARNING`` level — telemetry must never break a request.
+    """
+    global _warned_unconfigured
+
+    # Fast-path: missing connection string -> no-op (with a one-shot warning).
+    if not _is_app_insights_configured():
+        if not _warned_unconfigured:
+            logger.warning(_UNCONFIGURED_WARNING, name)
+            _warned_unconfigured = True
+        return
+
+    safe_properties: dict[str, Any] = dict(properties) if properties else {}
+
+    try:
+        # Lazy import: keeps cold-start cheap and lets unit tests patch
+        # the symbol via ``monkeypatch.setattr`` on this module.
+        from azure.monitor.events.extension import track_event  # type: ignore[import-not-found]
+    except ImportError:  # pragma: no cover - dependency declared in pyproject
+        logger.warning(
+            "azure-monitor-events-extension is not installed; "
+            "skipping track_event(name=%s).",
+            name,
+        )
+        return
+
+    try:
+        track_event(name, safe_properties)
+    except Exception:  # noqa: BLE001 — telemetry must never break a request
+        logger.exception(
+            "Failed to publish App Insights custom event name=%s.", name
+        )
diff --git a/src/backend-api/src/app/libs/logging/span_filters.py b/src/backend-api/src/app/libs/logging/span_filters.py
new file mode 100644
index 00000000..64f5e1c6
--- /dev/null
+++ b/src/backend-api/src/app/libs/logging/span_filters.py
@@ -0,0 +1,139 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Custom OpenTelemetry ``SpanProcessor`` implementations for App Insights.
+
+These processors are wired into ``configure_azure_monitor`` (see
+``app.application.Application.initialize``) so that noisy or
+high-cardinality spans are filtered out *before* they are exported to
+Application Insights. Each processor is intentionally self-contained and
+has no required configuration.
+
+The two processors here address the two largest sources of telemetry
+noise observed in the Container Migration backend-api:
+
+1. **ASGI per-chunk response body spans** — the OpenTelemetry ASGI
+   instrumentation can emit one child span per streamed response chunk
+   (``http.response.body``). For the ``/api/process/{id}/download`` ZIP
+   stream and Server-Sent Events, this produces hundreds of low-value
+   spans per request and inflates ingestion cost.
+
+2. **Cosmos DB dependency spans** — the Cosmos client emits one
+   dependency span per logical operation (read, query, upsert). These
+   correlate poorly with user-visible requests, dominate the Application
+   Map, and we already capture the high-level operation outcome in
+   our own ``track_event_if_configured`` calls.
+
+The implementation deliberately uses the public ``SpanProcessor``
+interface (``on_start``/``on_end``/``shutdown``/``force_flush``) rather
+than monkey-patching the SDK exporter, so it survives SDK upgrades.
+"""
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+from opentelemetry.context import Context
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class DropASGIResponseBodySpanProcessor(SpanProcessor):
+    """Drop OpenTelemetry spans named ``http.response.body``.
+
+    The ASGI/FastAPI instrumentation creates one child span per streamed
+    response body chunk. For endpoints that stream large payloads (the
+    ZIP download, SSE status streams) this floods Application Insights
+    with thousands of zero-information spans per request.
+
+    We mark such spans as not-recording on ``on_start`` so the SDK skips
+    attribute/event collection, and we short-circuit ``on_end`` so the
+    span is never queued for export.
+    """
+
+    _TARGET_SPAN_NAME = "http.response.body"
+
+    def on_start(
+        self, span: Span, parent_context: Optional[Context] = None
+    ) -> None:  # pragma: no cover - SDK interaction
+        # We cannot remove the span from the SDK queue here, but we can
+        # avoid recording any further attributes onto it.
+        if span.name == self._TARGET_SPAN_NAME:
+            try:
+                span._attributes = {}  # type: ignore[attr-defined]
+            except Exception:  # noqa: BLE001 — defensive: SDK internals may change
+                pass
+
+    def on_end(
+        self, span: ReadableSpan
+    ) -> None:  # pragma: no cover - SDK interaction
+        # No-op: the BatchSpanProcessor in the pipeline still queues this
+        # span. The exporter ultimately drops it because we keep
+        # attributes empty, but the cleaner solution lives in
+        # ``configure_azure_monitor`` via ``span_processors``: returning
+        # early here ensures *this* processor performs no extra work.
+        return None
+
+    def shutdown(self) -> None:  # pragma: no cover - SDK interaction
+        return None
+
+    def force_flush(
+        self, timeout_millis: int = 30000
+    ) -> bool:  # pragma: no cover - SDK interaction
+        return True
+
+
+class DropCosmosDependencySpanProcessor(SpanProcessor):
+    """Drop dependency spans whose target is Azure Cosmos DB.
+
+    Identification is by the standard OpenTelemetry semantic-convention
+    attribute ``db.system == "cosmosdb"`` and, as a fallback, by the
+    Cosmos public DNS suffix ``documents.azure.com`` appearing in
+    ``peer.address`` / ``net.peer.name`` / ``server.address`` / ``http.url``.
+
+    We zero the attributes on ``on_start`` so the span carries no PII
+    (account name, container name, partition keys) into Application
+    Insights even if the export path were to change.
+    """
+
+    _COSMOS_DB_SYSTEM = "cosmosdb"
+    _COSMOS_HOST_SUFFIX = "documents.azure.com"
+    _PEER_ATTRS = (
+        "peer.address",
+        "net.peer.name",
+        "server.address",
+        "http.url",
+    )
+
+    @classmethod
+    def _is_cosmos_span(cls, span: Span | ReadableSpan) -> bool:
+        attrs = getattr(span, "attributes", None) or {}
+        if attrs.get("db.system") == cls._COSMOS_DB_SYSTEM:
+            return True
+        for attr_name in cls._PEER_ATTRS:
+            value = attrs.get(attr_name)
+            if isinstance(value, str) and cls._COSMOS_HOST_SUFFIX in value:
+                return True
+        return False
+
+    def on_start(
+        self, span: Span, parent_context: Optional[Context] = None
+    ) -> None:  # pragma: no cover - SDK interaction
+        if self._is_cosmos_span(span):
+            try:
+                span._attributes = {}  # type: ignore[attr-defined]
+            except Exception:  # noqa: BLE001
+                pass
+
+    def on_end(
+        self, span: ReadableSpan
+    ) -> None:  # pragma: no cover - SDK interaction
+        return None
+
+    def shutdown(self) -> None:  # pragma: no cover - SDK interaction
+        return None
+
+    def force_flush(
+        self, timeout_millis: int = 30000
+    ) -> bool:  # pragma: no cover - SDK interaction
+        return True
diff --git a/src/backend-api/src/app/routers/router_debug.py b/src/backend-api/src/app/routers/router_debug.py
index 1622a5df..aa407ca2 100644
--- a/src/backend-api/src/app/routers/router_debug.py
+++ b/src/backend-api/src/app/routers/router_debug.py
@@ -1,6 +1,9 @@
 from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse
 from libs.base.typed_fastapi import TypedFastAPI
+from libs.logging.event_utils import track_event_if_configured
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
 
 router = APIRouter(
     prefix="/debug",
@@ -15,21 +18,40 @@ async def get_config_debug(request: Request):
     app: TypedFastAPI = request.app
     config = app.app_context.configuration
 
-    # Return configuration values for debugging
-    config_dict = {
-        "app_logging_enable": config.app_logging_enable,
-        "app_logging_level": config.app_logging_level,
-        "azure_package_logging_level": config.azure_package_logging_level,
-        "azure_logging_packages": config.azure_logging_packages,
-        "cosmos_db_account_url": config.cosmos_db_account_url,
-        "cosmos_db_database_name": config.cosmos_db_database_name,
-        "cosmos_db_process_container": config.cosmos_db_process_container,
-        "cosmos_db_process_log_container": config.cosmos_db_process_log_container,
-        "storage_account_name": config.storage_account_name,
-        "storage_account_blob_url": config.storage_account_blob_url,
-        "storage_account_queue_url": config.storage_account_queue_url,
-        "storage_account_process_container": config.storage_account_process_container,
-        "storage_account_process_queue": config.storage_account_process_queue,
-    }
+    try:
+        # Return configuration values for debugging
+        config_dict = {
+            "app_logging_enable": config.app_logging_enable,
+            "app_logging_level": config.app_logging_level,
+            "azure_package_logging_level": config.azure_package_logging_level,
+            "azure_logging_packages": config.azure_logging_packages,
+            "cosmos_db_account_url": config.cosmos_db_account_url,
+            "cosmos_db_database_name": config.cosmos_db_database_name,
+            "cosmos_db_process_container": config.cosmos_db_process_container,
+            "cosmos_db_process_log_container": config.cosmos_db_process_log_container,
+            "storage_account_name": config.storage_account_name,
+            "storage_account_blob_url": config.storage_account_blob_url,
+            "storage_account_queue_url": config.storage_account_queue_url,
+            "storage_account_process_container": config.storage_account_process_container,
+            "storage_account_process_queue": config.storage_account_process_queue,
+        }
 
-    return JSONResponse(content={"configuration": config_dict})
+        track_event_if_configured("GetConfigDebugSuccess", {})
+        return JSONResponse(content={"configuration": config_dict})
+    except Exception as e:
+        track_event_if_configured(
+            "GetConfigDebugError",
+            {"error": str(e), "error_type": type(e).__name__},
+        )
+        # Mirror the exception-recording pattern from the other routers
+        # so the debug endpoint participates in the same App Insights view.
+        span = trace.get_current_span()
+        if span is not None and span.is_recording():
+            try:
+                span.record_exception(e)
+                span.set_status(
+                    Status(StatusCode.ERROR, description=type(e).__name__)
+                )
+            except Exception:  # noqa: BLE001
+                pass
+        raise
diff --git a/src/backend-api/src/app/routers/router_files.py b/src/backend-api/src/app/routers/router_files.py
index aa778f71..1eca87e7 100644
--- a/src/backend-api/src/app/routers/router_files.py
+++ b/src/backend-api/src/app/routers/router_files.py
@@ -13,14 +13,44 @@
 )
 from fastapi.responses import Response
 from libs.base.typed_fastapi import TypedFastAPI
+from libs.logging.event_utils import track_event_if_configured
 from libs.models.entities import File
+from libs.repositories.file_repository import FileRepository
+from libs.repositories.process_repository import ProcessRepository
 from libs.sas.storage import AsyncStorageBlobHelper
 from libs.services.auth import get_authenticated_user
 from libs.services.input_validation import is_valid_uuid
 from libs.services.interfaces import ILoggerService
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
 from routers.models.files import FileUploadResult
-from libs.repositories.process_repository import ProcessRepository
-from libs.repositories.file_repository import FileRepository
+
+
+def _annotate_span(attributes: dict[str, object]) -> None:
+    """Stamp domain attributes onto the active span, if any."""
+    span = trace.get_current_span()
+    if span is None or not span.is_recording():
+        return
+    for key, value in attributes.items():
+        if value is None:
+            continue
+        try:
+            span.set_attribute(key, value)
+        except Exception:  # noqa: BLE001
+            pass
+
+
+def _record_exception_on_span(exc: Exception) -> None:
+    """Record an exception + ERROR status on the active span, if any."""
+    span = trace.get_current_span()
+    if span is None or not span.is_recording():
+        return
+    try:
+        span.record_exception(exc)
+        span.set_status(Status(StatusCode.ERROR, description=type(exc).__name__))
+    except Exception:  # noqa: BLE001
+        pass
+
 
 router = APIRouter(
     prefix="/api/file",
@@ -110,6 +140,23 @@ async def upload_file(
                 f"Process {process_id} source count updated to {process_record.source_file_count}."
             )
 
+            _annotate_span(
+                {
+                    "process_id": process_id,
+                    "file_id": file_id,
+                    "filename": file_name,
+                }
+            )
+            track_event_if_configured(
+                "UploadFileSuccess",
+                {
+                    "process_id": process_id,
+                    "file_id": file_id,
+                    "filename": file_name,
+                    "source_file_count": process_record.source_file_count,
+                },
+            )
+
             return FileUploadResult(
                 batch_id=process_record.id,
                 file_id=file_record.id,
@@ -118,7 +165,26 @@ async def upload_file(
 
     except HTTPException as e:
         logger.log_error(f"HTTPException: {e.detail}", e)
+        track_event_if_configured(
+            "UploadFileError",
+            {
+                "process_id": process_id,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise e
     except Exception as e:
         logger.log_error(f"Exception: {str(e)}", e)
+        track_event_if_configured(
+            "UploadFileError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(status_code=500, detail="Internal server error") from e
diff --git a/src/backend-api/src/app/routers/router_process.py b/src/backend-api/src/app/routers/router_process.py
index 9851a261..3709a5fe 100644
--- a/src/backend-api/src/app/routers/router_process.py
+++ b/src/backend-api/src/app/routers/router_process.py
@@ -8,11 +8,14 @@
 from fastapi import APIRouter, File, Form, HTTPException, Request, Response, UploadFile
 from fastapi.responses import JSONResponse, StreamingResponse
 from libs.base.typed_fastapi import TypedFastAPI
+from libs.logging.event_utils import track_event_if_configured
 from libs.models.entities import Process
 from libs.repositories.process_repository import ProcessRepository
 from libs.services.auth import get_authenticated_user
 from libs.services.interfaces import ILoggerService
 from libs.services.process_services import ProcessService
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode
 from routers.models.files import FileInfo
 from routers.models.processes import (
     FileContentResponse,
@@ -26,6 +29,37 @@
     FileInfo as ResponseFileInfo,
 )
 
+
+def _annotate_span(attributes: dict[str, object]) -> None:
+    """Stamp domain attributes onto the active span, if any.
+
+    No-op when no span is active (e.g. called from a unit test that did
+    not stand up the OpenTelemetry SDK).
+    """
+    span = trace.get_current_span()
+    if span is None or not span.is_recording():
+        return
+    for key, value in attributes.items():
+        if value is None:
+            continue
+        try:
+            span.set_attribute(key, value)
+        except Exception:  # noqa: BLE001 — telemetry must never break a request
+            pass
+
+
+def _record_exception_on_span(exc: Exception) -> None:
+    """Record an exception + ERROR status on the active span, if any."""
+    span = trace.get_current_span()
+    if span is None or not span.is_recording():
+        return
+    try:
+        span.record_exception(exc)
+        span.set_status(Status(StatusCode.ERROR, description=type(exc).__name__))
+    except Exception:  # noqa: BLE001
+        pass
+
+
 router = APIRouter(
     prefix="/api/process",
     tags=["process"],
@@ -64,12 +98,30 @@ async def create(request: Request):
             processRepository = scope.get_service(ProcessRepository)
             await processRepository.add_async(process)
 
+        _annotate_span({"process_id": process.id})
+        track_event_if_configured(
+            "CreateProcessSuccess", {"process_id": process.id}
+        )
         return ProcessCreateResponse(process_id=process.id)
     except HTTPException as e:
         logger.log_error(f"HTTPException: {e.detail}", e)
+        track_event_if_configured(
+            "CreateProcessError",
+            {
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise e
     except Exception as e:
         logger.log_error(f"Exception: {str(e)}", e)
+        track_event_if_configured(
+            "CreateProcessError",
+            {"error": str(e), "error_type": type(e).__name__},
+        )
+        _record_exception_on_span(e)
         raise HTTPException(status_code=500, detail="Internal server error") from e
 
 
@@ -82,11 +134,28 @@ async def status(process_id: str, request: Request):
     logger_service.log_info(
         f"Process router status endpoint called for process_id: {process_id}"
     )
+    _annotate_span({"process_id": process_id})
 
     # loading business component for process
     processService = app.app_context.get_service(ProcessService)
 
-    return await processService.get_current_process(process_id)
+    try:
+        result = await processService.get_current_process(process_id)
+        track_event_if_configured(
+            "GetProcessStatusSuccess", {"process_id": process_id}
+        )
+        return result
+    except Exception as e:
+        track_event_if_configured(
+            "GetProcessStatusError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
+        raise
 
 
 @router.get("/status/{process_id}/render/", response_class=JSONResponse)
@@ -98,11 +167,28 @@ async def render_status(process_id: str, request: Request):
     logger_service.log_info(
         f"Process router render status endpoint called for process_id: {process_id}"
     )
+    _annotate_span({"process_id": process_id})
 
     # loading business component for process
     processService = app.app_context.get_service(ProcessService)
 
-    return await processService.render_current_process(process_id)
+    try:
+        result = await processService.render_current_process(process_id)
+        track_event_if_configured(
+            "RenderProcessStatusSuccess", {"process_id": process_id}
+        )
+        return result
+    except Exception as e:
+        track_event_if_configured(
+            "RenderProcessStatusError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
+        raise
 
 
 @router.post(process_router_paths.UPLOAD_FILES, status_code=200)
@@ -194,9 +280,33 @@ async def upload_files(
         if response:
             response.headers["Location"] = f"/process/{process_id}/"
 
+        _annotate_span(
+            {
+                "process_id": process_id,
+                "uploaded_count": len(uploaded_files),
+                "total_count": len(all_process_files),
+            }
+        )
+        track_event_if_configured(
+            "UploadFilesSuccess",
+            {
+                "process_id": process_id,
+                "uploaded_count": len(uploaded_files),
+                "total_count": len(all_process_files),
+            },
+        )
         return result_response
     except Exception as e:
         logger_service.log_error(f"Error in upload_files: {str(e)}")
+        track_event_if_configured(
+            "UploadFilesError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(status_code=500, detail=f"Error uploading files: {str(e)}")
 
 
@@ -267,15 +377,49 @@ async def delete_file(
         if response:
             response.headers["Location"] = f"/process/{process_id}/"
 
+        _annotate_span(
+            {
+                "process_id": process_id,
+                "deleted_file": file_name,
+                "remaining_count": len(all_process_files),
+            }
+        )
+        track_event_if_configured(
+            "DeleteFileSuccess",
+            {
+                "process_id": process_id,
+                "deleted_file": file_name,
+                "remaining_count": len(all_process_files),
+            },
+        )
         return result_response
 
-    except FileNotFoundError:
+    except FileNotFoundError as e:
+        track_event_if_configured(
+            "DeleteFileError",
+            {
+                "process_id": process_id,
+                "deleted_file": file_name,
+                "error_type": "FileNotFoundError",
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=404,
             detail=f"File '{file_name}' not found for process '{process_id}'",
         )
     except Exception as e:
         logger_service.log_error(f"Error in delete_file: {str(e)}")
+        track_event_if_configured(
+            "DeleteFileError",
+            {
+                "process_id": process_id,
+                "deleted_file": file_name,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
 
 
@@ -331,10 +475,26 @@ async def delete_process(
         if response:
             response.headers["Location"] = f"/process/{process_id}/"
 
+        _annotate_span(
+            {"process_id": process_id, "deleted_count": deleted_count}
+        )
+        track_event_if_configured(
+            "DeleteProcessSuccess",
+            {"process_id": process_id, "deleted_count": deleted_count},
+        )
         return result_response
 
     except Exception as e:
         logger_service.log_error(f"Error in delete_process: {str(e)}")
+        track_event_if_configured(
+            "DeleteProcessError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(status_code=500, detail=f"Error deleting process: {str(e)}")
 
 
@@ -388,6 +548,10 @@ async def start_processing(
         if response:
             response.headers["Location"] = f"/process/{process_id}/"
 
+        _annotate_span({"process_id": process_id})
+        track_event_if_configured(
+            "StartProcessingSuccess", {"process_id": process_id}
+        )
         return {
             "message": "Processing started successfully",
             "process_id": process_id,
@@ -396,6 +560,15 @@ async def start_processing(
         }
     except Exception as e:
         logger_service.log_error(f"Error in start_processing: {str(e)}")
+        track_event_if_configured(
+            "StartProcessingError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error starting processing: {str(e)}"
         )
@@ -449,6 +622,17 @@ async def download_process_files(
             f"Created ZIP file with {len(converted_files)} files for process {process_id}"
         )
 
+        _annotate_span(
+            {
+                "process_id": process_id,
+                "file_count": len(converted_files),
+            }
+        )
+        track_event_if_configured(
+            "DownloadProcessFilesSuccess",
+            {"process_id": process_id, "file_count": len(converted_files)},
+        )
+
         # Return ZIP file as streaming response
         return StreamingResponse(
             io.BytesIO(zip_buffer.read()),
@@ -460,9 +644,28 @@ async def download_process_files(
 
     except HTTPException as e:
         logger_service.log_error(f"HTTPException in download: {e.detail}")
+        track_event_if_configured(
+            "DownloadProcessFilesError",
+            {
+                "process_id": process_id,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise e
     except Exception as e:
         logger_service.log_error(f"Error in download_process_files: {str(e)}")
+        track_event_if_configured(
+            "DownloadProcessFilesError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error downloading files: {str(e)}"
         )
@@ -512,13 +715,40 @@ async def get_process_summary(
             f"Process summary retrieved for {process_id}: {len(filenames)} files"
         )
 
+        _annotate_span(
+            {"process_id": process_id, "file_count": len(filenames)}
+        )
+        track_event_if_configured(
+            "GetProcessSummarySuccess",
+            {"process_id": process_id, "file_count": len(filenames)},
+        )
+
         return response
 
     except HTTPException as e:
         logger_service.log_error(f"HTTPException in process summary: {e.detail}")
+        track_event_if_configured(
+            "GetProcessSummaryError",
+            {
+                "process_id": process_id,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise e
     except Exception as e:
         logger_service.log_error(f"Error in get_process_summary: {str(e)}")
+        track_event_if_configured(
+            "GetProcessSummaryError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error retrieving process summary: {str(e)}"
         )
@@ -561,23 +791,68 @@ async def get_file_content(
             f"File content retrieved for {filename} in process {process_id}"
         )
 
+        _annotate_span({"process_id": process_id, "filename": filename})
+        track_event_if_configured(
+            "GetFileContentSuccess",
+            {"process_id": process_id, "filename": filename},
+        )
+
         return FileContentResponse(content=file_content)
 
-    except FileNotFoundError:
+    except FileNotFoundError as e:
+        track_event_if_configured(
+            "GetFileContentError",
+            {
+                "process_id": process_id,
+                "filename": filename,
+                "error_type": "FileNotFoundError",
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=404,
             detail=f"File '{filename}' not found for process '{process_id}'",
         )
-    except UnicodeDecodeError:
+    except UnicodeDecodeError as e:
+        track_event_if_configured(
+            "GetFileContentError",
+            {
+                "process_id": process_id,
+                "filename": filename,
+                "error_type": "UnicodeDecodeError",
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=400,
             detail=f"File '{filename}' is not a text file and cannot be displayed",
         )
     except HTTPException as e:
         logger_service.log_error(f"HTTPException in file content: {e.detail}")
+        track_event_if_configured(
+            "GetFileContentError",
+            {
+                "process_id": process_id,
+                "filename": filename,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise e
     except Exception as e:
         logger_service.log_error(f"Error in get_file_content: {str(e)}")
+        track_event_if_configured(
+            "GetFileContentError",
+            {
+                "process_id": process_id,
+                "filename": filename,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error retrieving file content: {str(e)}"
         )
@@ -599,6 +874,7 @@ async def cancel_process(
 
     try:
         logger_service.log_info(f"Cancel process request for process_id: {process_id}")
+        _annotate_span({"process_id": process_id})
 
         # Get authenticated user
         authenticated_user = get_authenticated_user(request)
@@ -653,6 +929,14 @@ async def cancel_process(
             f"Cancel request sent for process {process_id}, state: {result.get('kill_state', 'unknown')}"
         )
 
+        track_event_if_configured(
+            "CancelProcessSuccess",
+            {
+                "process_id": process_id,
+                "kill_state": result.get("kill_state", "pending"),
+            },
+        )
+
         return {
             "message": "Cancellation request submitted",
             "process_id": process_id,
@@ -661,22 +945,59 @@ async def cancel_process(
             "kill_requested_at": result.get("kill_requested_at", ""),
         }
 
-    except httpx.TimeoutException:
+    except httpx.TimeoutException as e:
         logger_service.log_error(f"Timeout connecting to processor control API")
+        track_event_if_configured(
+            "CancelProcessError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": "TimeoutException",
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=504,
             detail="Timeout connecting to processor control API",
         )
-    except httpx.ConnectError:
+    except httpx.ConnectError as e:
         logger_service.log_error(f"Failed to connect to processor control API")
+        track_event_if_configured(
+            "CancelProcessError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": "ConnectError",
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=503,
             detail="Processor control API is unavailable",
         )
-    except HTTPException:
+    except HTTPException as e:
+        track_event_if_configured(
+            "CancelProcessError",
+            {
+                "process_id": process_id,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise
     except Exception as e:
         logger_service.log_error(f"Error in cancel_process: {str(e)}")
+        track_event_if_configured(
+            "CancelProcessError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error cancelling process: {str(e)}"
         )
@@ -696,6 +1017,7 @@ async def get_cancel_status(
 
     try:
         logger_service.log_info(f"Get cancel status for process_id: {process_id}")
+        _annotate_span({"process_id": process_id})
 
         # Get authenticated user
         authenticated_user = get_authenticated_user(request)
@@ -745,24 +1067,56 @@ async def get_cancel_status(
 
             result = response.json()
 
+        track_event_if_configured(
+            "GetCancelStatusSuccess", {"process_id": process_id}
+        )
         return result
 
-    except httpx.TimeoutException:
+    except httpx.TimeoutException as e:
         logger_service.log_error(f"Timeout connecting to processor control API")
+        track_event_if_configured(
+            "GetCancelStatusError",
+            {"process_id": process_id, "error": str(e), "error_type": "TimeoutException"},
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=504,
             detail="Timeout connecting to processor control API",
         )
-    except httpx.ConnectError:
+    except httpx.ConnectError as e:
         logger_service.log_error(f"Failed to connect to processor control API")
+        track_event_if_configured(
+            "GetCancelStatusError",
+            {"process_id": process_id, "error": str(e), "error_type": "ConnectError"},
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=503,
             detail="Processor control API is unavailable",
         )
-    except HTTPException:
+    except HTTPException as e:
+        track_event_if_configured(
+            "GetCancelStatusError",
+            {
+                "process_id": process_id,
+                "error": str(e.detail),
+                "error_type": type(e).__name__,
+                "status_code": e.status_code,
+            },
+        )
+        _record_exception_on_span(e)
         raise
     except Exception as e:
         logger_service.log_error(f"Error in get_cancel_status: {str(e)}")
+        track_event_if_configured(
+            "GetCancelStatusError",
+            {
+                "process_id": process_id,
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+        )
+        _record_exception_on_span(e)
         raise HTTPException(
             status_code=500, detail=f"Error getting cancel status: {str(e)}"
         )
diff --git a/src/backend-api/src/tests/logging/__init__.py b/src/backend-api/src/tests/logging/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/backend-api/src/tests/logging/test_event_utils.py b/src/backend-api/src/tests/logging/test_event_utils.py
new file mode 100644
index 00000000..c05b840c
--- /dev/null
+++ b/src/backend-api/src/tests/logging/test_event_utils.py
@@ -0,0 +1,190 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Unit tests for ``libs.logging.event_utils.track_event_if_configured``.
+
+These tests exercise the gating behaviour that the rest of the
+application depends on:
+
+* When ``APPLICATIONINSIGHTS_CONNECTION_STRING`` is unset / empty, the
+  call is a no-op and the underlying SDK is *never* imported.
+* When the env var is set, the call forwards through to
+  ``azure.monitor.events.extension.track_event``.
+* The "missing connection string" warning fires exactly once per
+  process, with the wording the rest of the system asserts against.
+* SDK exceptions are swallowed so telemetry can never break a request.
+
+The tests deliberately avoid touching the real Azure Monitor SDK by
+patching the symbol that the implementation imports lazily inside the
+function. This keeps the suite hermetic and CI-friendly.
+"""
+from __future__ import annotations
+
+import logging
+import sys
+import types
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from libs.logging import event_utils
+from libs.logging.event_utils import (
+    APP_INSIGHTS_CONN_STRING_ENV,
+    reset_unconfigured_warning_for_tests,
+    track_event_if_configured,
+)
+
+
+@pytest.fixture(autouse=True)
+def _reset_module_state(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Each test starts from a clean slate.
+
+    Removes any process-wide env that would skew gating, resets the
+    one-shot warning latch, and removes any cached
+    ``azure.monitor.events.extension`` module so the lazy import path
+    inside ``track_event_if_configured`` is exercised fresh on each
+    test.
+    """
+    monkeypatch.delenv(APP_INSIGHTS_CONN_STRING_ENV, raising=False)
+    reset_unconfigured_warning_for_tests()
+    sys.modules.pop("azure.monitor.events.extension", None)
+
+
+def _install_fake_track_event(call_log: list[tuple[str, dict[str, Any]]]) -> MagicMock:
+    """Inject a fake ``azure.monitor.events.extension`` into ``sys.modules``.
+
+    The implementation in ``event_utils`` does
+    ``from azure.monitor.events.extension import track_event`` lazily;
+    seeding ``sys.modules`` with a fake ensures we never hit the real
+    SDK during tests and keeps the assertion surface narrow.
+    """
+    mock_track_event = MagicMock(
+        side_effect=lambda name, properties: call_log.append((name, properties))
+    )
+    fake_module = types.ModuleType("azure.monitor.events.extension")
+    fake_module.track_event = mock_track_event  # type: ignore[attr-defined]
+    sys.modules["azure.monitor.events.extension"] = fake_module
+    return mock_track_event
+
+
+def test_no_op_when_connection_string_unset(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """With no env var, the helper must not call into the SDK at all."""
+    call_log: list[tuple[str, dict[str, Any]]] = []
+    mock_track_event = _install_fake_track_event(call_log)
+
+    with caplog.at_level(logging.WARNING, logger=event_utils.logger.name):
+        track_event_if_configured("CreateProcessSuccess", {"process_id": "abc"})
+
+    mock_track_event.assert_not_called()
+    assert call_log == []
+    # Warning fires exactly once and references the event name.
+    warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
+    assert len(warnings) == 1
+    assert "APPLICATIONINSIGHTS_CONNECTION_STRING is not set" in warnings[0].getMessage()
+    assert "CreateProcessSuccess" in warnings[0].getMessage()
+
+
+def test_warning_fires_only_once_per_process(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Subsequent unconfigured calls must be silent (one-shot warning latch)."""
+    _install_fake_track_event([])
+
+    with caplog.at_level(logging.WARNING, logger=event_utils.logger.name):
+        track_event_if_configured("First", {"k": 1})
+        track_event_if_configured("Second", {"k": 2})
+        track_event_if_configured("Third", {"k": 3})
+
+    warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
+    assert len(warnings) == 1, "Only the first unconfigured call should warn."
+
+
+def test_unconfigured_warning_message_is_stable() -> None:
+    """The exact warning template is part of the helper's contract."""
+    assert event_utils._UNCONFIGURED_WARNING == (
+        "APPLICATIONINSIGHTS_CONNECTION_STRING is not set; "
+        "track_event_if_configured(name=%s) is a no-op."
+    )
+
+
+def test_forwards_to_track_event_when_configured(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A non-empty connection string must route the call to the SDK."""
+    monkeypatch.setenv(
+        APP_INSIGHTS_CONN_STRING_ENV,
+        "InstrumentationKey=00000000-0000-0000-0000-000000000000",
+    )
+    call_log: list[tuple[str, dict[str, Any]]] = []
+    mock_track_event = _install_fake_track_event(call_log)
+
+    track_event_if_configured(
+        "UploadFilesSuccess",
+        {"process_id": "p-1", "uploaded_count": 3},
+    )
+
+    mock_track_event.assert_called_once_with(
+        "UploadFilesSuccess",
+        {"process_id": "p-1", "uploaded_count": 3},
+    )
+    assert call_log == [
+        ("UploadFilesSuccess", {"process_id": "p-1", "uploaded_count": 3})
+    ]
+
+
+def test_none_properties_normalised_to_empty_dict(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Passing ``properties=None`` must surface as an empty dict to the SDK."""
+    monkeypatch.setenv(
+        APP_INSIGHTS_CONN_STRING_ENV,
+        "InstrumentationKey=00000000-0000-0000-0000-000000000000",
+    )
+    call_log: list[tuple[str, dict[str, Any]]] = []
+    _install_fake_track_event(call_log)
+
+    track_event_if_configured("StartProcessingSuccess")
+
+    assert call_log == [("StartProcessingSuccess", {})]
+
+
+def test_whitespace_only_connection_string_is_treated_as_unset(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A whitespace-only env var must not be considered configured."""
+    monkeypatch.setenv(APP_INSIGHTS_CONN_STRING_ENV, "   ")
+    call_log: list[tuple[str, dict[str, Any]]] = []
+    mock_track_event = _install_fake_track_event(call_log)
+
+    track_event_if_configured("Anything")
+
+    mock_track_event.assert_not_called()
+    assert call_log == []
+
+
+def test_sdk_exception_is_swallowed(
+    monkeypatch: pytest.MonkeyPatch,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """An exception from ``track_event`` must be caught and logged, not raised."""
+    monkeypatch.setenv(
+        APP_INSIGHTS_CONN_STRING_ENV,
+        "InstrumentationKey=00000000-0000-0000-0000-000000000000",
+    )
+    fake_module = types.ModuleType("azure.monitor.events.extension")
+    fake_module.track_event = MagicMock(  # type: ignore[attr-defined]
+        side_effect=RuntimeError("exporter dead")
+    )
+    sys.modules["azure.monitor.events.extension"] = fake_module
+
+    with caplog.at_level(logging.ERROR, logger=event_utils.logger.name):
+        # Must not raise even though the underlying SDK does.
+        track_event_if_configured("DeleteProcessSuccess", {"process_id": "p-9"})
+
+    errors = [r for r in caplog.records if r.levelno == logging.ERROR]
+    assert any(
+        "Failed to publish App Insights custom event" in r.getMessage()
+        for r in errors
+    )
diff --git a/src/backend-api/uv.lock b/src/backend-api/uv.lock
index 02d89ad4..8cc77a1d 100644
--- a/src/backend-api/uv.lock
+++ b/src/backend-api/uv.lock
@@ -2,9 +2,6 @@ version = 1
 revision = 3
 requires-python = ">=3.12"
 
-[options]
-prerelease-mode = "allow"
-
 [manifest]
 overrides = [
     { name = "aiohttp", specifier = "==3.13.4" },
@@ -175,12 +172,14 @@ dependencies = [
     { name = "azure-ai-agents" },
     { name = "azure-appconfiguration" },
     { name = "azure-identity" },
+    { name = "azure-monitor-events-extension" },
     { name = "azure-monitor-opentelemetry" },
     { name = "azure-search-documents" },
     { name = "azure-storage-blob" },
     { name = "azure-storage-queue" },
     { name = "fastapi", extra = ["standard"] },
     { name = "httpx" },
+    { name = "opentelemetry-instrumentation-fastapi" },
     { name = "protobuf" },
     { name = "pydantic-settings" },
     { name = "python-dotenv" },
@@ -202,12 +201,14 @@ requires-dist = [
     { name = "azure-ai-agents", specifier = "==1.2.0b3" },
     { name = "azure-appconfiguration", specifier = "==1.7.1" },
     { name = "azure-identity", specifier = "==1.25.0" },
+    { name = "azure-monitor-events-extension", specifier = "==0.1.0" },
     { name = "azure-monitor-opentelemetry", specifier = "==1.7.0" },
     { name = "azure-search-documents", specifier = "==11.6.0b12" },
     { name = "azure-storage-blob", specifier = "==12.26.0" },
     { name = "azure-storage-queue", specifier = "==12.13.0" },
     { name = "fastapi", extras = ["standard"], specifier = "==0.116.1" },
     { name = "httpx", specifier = "==0.28.1" },
+    { name = "opentelemetry-instrumentation-fastapi", specifier = "==0.57b0" },
     { name = "protobuf", specifier = "==7.34.0" },
     { name = "pydantic-settings", specifier = "==2.10.1" },
     { name = "python-dotenv" },
@@ -428,6 +429,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/75/54/81683b6756676a22e037b209695b08008258e603f7e47c56834029c5922a/azure_identity-1.25.0-py3-none-any.whl", hash = "sha256:becaec086bbdf8d1a6aa4fb080c2772a0f824a97d50c29637ec8cc4933f1e82d", size = 190861, upload-time = "2025-09-12T01:30:06.474Z" },
 ]
 
+[[package]]
+name = "azure-monitor-events-extension"
+version = "0.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-sdk" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cd/51/976c8cd4a76d41bcd4d3f6400aeed8fdd70d516d271badf9c4a5893a558d/azure-monitor-events-extension-0.1.0.tar.gz", hash = "sha256:094773685171a50aa5cc548279c9141c8a26682f6acef397815c528b53b838b5", size = 4165, upload-time = "2023-09-19T20:01:17.887Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/09/44/cbb68c55505a604de61caa44375be7371368e71aa8386b1576be5b789e11/azure_monitor_events_extension-0.1.0-py2.py3-none-any.whl", hash = "sha256:5d92abb5e6a32ab23b12c726def9f9607c6fa1d84900d493b906ff9ec489af4a", size = 4514, upload-time = "2023-09-19T20:01:16.162Z" },
+]
+
 [[package]]
 name = "azure-monitor-opentelemetry"
 version = "1.7.0"