From 20b97dde42602485191ac5d3671b6c4057161b51 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 2 Jul 2026 12:37:52 +0000 Subject: [PATCH] DAOS-18193 ec: just for test Signed-off-by: Mohamad Chaarawi --- src/container/srv_target.c | 19 ++++++++++++++++++- src/include/daos_srv/pool.h | 1 + src/pool/srv.c | 14 +++++++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 17ea789a984..cbd0be2e441 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -433,7 +433,24 @@ cont_child_aggregate(struct ds_cont_child *cont, cont_aggregate_cb_t agg_cb, if (!param->ap_vos_agg) vos_cont_set_mod_bound(cont->sc_hdl, epoch_range.epr_hi); - if (dss_xstream_is_busy()) + /* + * Force-merge coalesces small contiguous records into large physical + * extents to defragment the VOS tree. It's normally skipped for this + * tail (non-snapshot) range under load to avoid relocation IO. + * + * For EC objects this coalescing is the ONLY way the data shards get + * defragmented: the client can't recompute parity for partial-stripe + * writes, so small writes land as many tiny records that only VOS + * aggregation can merge. Unlike replicated objects, this range is + * already capped to the EC aggregation epoch boundary (see + * adjust_upper_bound()), so keeping force-merge enabled here can never + * merge/relocate epochs whose parity isn't yet consistent - degraded + * reads and rebuild always reconstruct identical bytes, i.e. no data + * corruption. Retain it for the EC VOS-agg pass so partial-stripe EC + * data doesn't stay fragmented (slow reads/rebuild) under sustained + * load. Reversible via DAOS_EC_AGG_FORCE_MERGE=0. + */ + if (dss_xstream_is_busy() && !(param->ap_vos_agg && ec_agg_force_merge && !ec_agg_disabled)) flags &= ~VOS_AGG_FL_FORCE_MERGE; rc = agg_cb(cont, &epoch_range, flags, param); out: diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index dc4a7c747d7..39bf3f6c0ab 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -415,6 +415,7 @@ int ds_pool_rebuild_stop(uuid_t pool_uuid, uint32_t force, struct rsvc_hint *hint); extern bool ec_agg_disabled; +extern bool ec_agg_force_merge; int dsc_pool_open(uuid_t pool_uuid, uuid_t pool_hdl_uuid, unsigned int flags, const char *grp, diff --git a/src/pool/srv.c b/src/pool/srv.c index 226feeb31c1..efa7e91e31e 100644 --- a/src/pool/srv.c +++ b/src/pool/srv.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -22,6 +22,13 @@ #include "srv_layout.h" bool ec_agg_disabled; +/* + * Keep VOS aggregation's force-merge (record coalescing) enabled for EC objects + * even when the target xstream is busy. For EC, coalescing the data shards is the + * only way partial-stripe writes get defragmented, and it is always bounded to the + * EC aggregation epoch boundary, so it cannot merge parity-inconsistent epochs. + */ +bool ec_agg_force_merge = true; uint32_t pw_rf = -1; /* pool wise redundancy factor */ uint32_t ps_cache_intvl = 2; /* pool space cache expiration time, in seconds */ #define PW_RF_DEFAULT (2) @@ -73,6 +80,11 @@ init(void) if (unlikely(ec_agg_disabled)) D_WARN("EC aggregation is disabled.\n"); + ec_agg_force_merge = true; + d_getenv_bool("DAOS_EC_AGG_FORCE_MERGE", &ec_agg_force_merge); + if (!ec_agg_force_merge) + D_WARN("EC aggregation force-merge (coalescing under load) is disabled.\n"); + pw_rf = -1; if (!check_pool_redundancy_factor("DAOS_POOL_RF")) pw_rf = PW_RF_DEFAULT;