diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index a25de535c6e..36e5e49f0d9 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -3031,6 +3031,7 @@ ds_obj_rw_handler(crt_rpc_t *rpc) uint32_t max_ver = 0; struct dtx_epoch epoch = {0}; int rc; + int retry = 0; bool need_abort = false; D_ASSERT(orw != NULL); @@ -3248,6 +3249,17 @@ ds_obj_rw_handler(crt_rpc_t *rpc) break; } + /* If we have already retried once, but still failed for -DER_TX_RESTART, then + * it is quite possible that the -DER_TX_RESTART failure is related with server + * overload or some congestion caused RPC delay. Let's ask client to retry with + * some backoff delay. That will avoid increasing server workload/congestion and + * avoid client RPC timeout during server retry repeatedly. + */ + if (++retry > 1) { + rc = -DER_INPROGRESS; + break; + } + /* Only standalone updates use this RPC. Retry with newer epoch. */ orw->orw_epoch = d_hlc_get(); exec_arg.flags |= ORF_RESEND; @@ -4019,6 +4031,7 @@ ds_obj_punch_handler(crt_rpc_t *rpc) uint32_t max_ver = 0; struct dtx_epoch epoch; int rc; + int retry = 0; bool need_abort = false; opi = crt_req_get(rpc); @@ -4159,6 +4172,17 @@ ds_obj_punch_handler(crt_rpc_t *rpc) break; } + /* If we have already retried once, but still failed for -DER_TX_RESTART, then + * it is quite possible that the -DER_TX_RESTART failure is related with server + * overload or some congestion caused RPC delay. Let's ask client to retry with + * some backoff delay. That will avoid increasing server workload/congestion and + * avoid client RPC timeout during server retry repeatedly. + */ + if (++retry > 1) { + rc = -DER_INPROGRESS; + break; + } + /* Only standalone punches use this RPC. Retry with newer epoch. */ opi->opi_epoch = d_hlc_get(); exec_arg.flags |= ORF_RESEND;