Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion ibv-conduit/README
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,12 @@ Paul H. Hargrove <PHHargrove@lbl.gov>
The default value is the maximum that, together with the maximum sized
header, fits into a 4KiB transfer (currently 4012).
A value of zero ensures the payload and header always travel separately.


+ GASNET_PACKEDLONG_ALLOC_LIMIT
As for GASNET_PACKEDLONG_LIMIT, above, but for the case of Negotiated-
Payload AMLong with GASNet-allocated buffer.
The default is to take on the value of GASNET_PACKEDLONG_LIMIT.

+ GASNET_NONBULKPUT_BOUNCE_LIMIT
This parameter sets the limit on the use of bounce buffers to achieve
local completion of "non-bulk" PUT and AMLong payload transfers. When
Expand Down
32 changes: 30 additions & 2 deletions ibv-conduit/gasnet_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,7 @@ static int gasnetc_load_settings(void) {
GASNETC_ENVINT(gasnetc_inline_limit, GASNET_INLINESEND_LIMIT, GASNETC_DEFAULT_INLINESEND_LIMIT, -1, 0);
GASNETC_ENVINT(gasnetc_nonbulk_bounce_limit, GASNET_NONBULKPUT_BOUNCE_LIMIT, GASNETC_DEFAULT_NONBULKPUT_BOUNCE_LIMIT, 0, 1);
GASNETC_ENVINT(gasnetc_packedlong_limit, GASNET_PACKEDLONG_LIMIT, GASNETC_DEFAULT_PACKEDLONG_LIMIT, 0, 1);
GASNETC_ENVINT(gasnetc_packedlong_alloc_limit, GASNET_PACKEDLONG_ALLOC_LIMIT, gasnetc_packedlong_limit, 0, 1);
GASNETC_ENVINT(gasnetc_am_gather_min, GASNET_AM_GATHER_MIN, GASNETC_DEFAULT_AM_GATHER_MIN, -1, 1);
if (gasnetc_am_gather_min == -1) {
// -1 is the documented value to disable this optimization
Expand Down Expand Up @@ -1067,6 +1068,12 @@ static int gasnetc_load_settings(void) {
(unsigned int)gasnetc_packedlong_limit, (unsigned int)GASNETC_MAX_PACKEDLONG);
gasnetc_packedlong_limit = GASNETC_MAX_PACKEDLONG;
}
if_pf (gasnetc_packedlong_alloc_limit > GASNETC_MAX_PACKEDLONG_(0)) {
fprintf(stderr,
"WARNING: GASNET_PACKEDLONG_ALLOC_LIMIT reduced from %u to %u\n",
(unsigned int)gasnetc_packedlong_alloc_limit, (unsigned int)GASNETC_MAX_PACKEDLONG_(0));
gasnetc_packedlong_alloc_limit = GASNETC_MAX_PACKEDLONG_(0);
}

#if GASNETC_DYNAMIC_CONNECT
gasnetc_conn_static = gasneti_getenv_yesno_withdefault("GASNET_CONNECT_STATIC", 1);
Expand Down Expand Up @@ -5175,8 +5182,29 @@ int gasnetc_commit_common(
gasneti_assert(!lc_opt);
local_cb = NULL;
local_cnt = NULL;
// TODO: RDMA of Long payload can be beneficial
copy_len = nbytes;
switch (category) {
#if GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM || GASNET_NATIVE_NP_ALLOC_REP_MEDIUM
case gasneti_Medium:
copy_len = nbytes;
break;
#endif

#if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG
case gasneti_Long:
if ((nbytes <= gasnetc_packedlong_alloc_limit) || !sd->_buf_alloc || (!GASNETC_PIN_SEGMENT && is_reply)) {
// Small enough to send like a Medium OR not in a bounce buffer (forced for firehose Reply)
copy_len = nbytes;
} else {
// Inject RMA
int rc = gasnetc_rdma_npam_long_put(sd->_ep, sd->_cep, sd->_addr, dest_addr, nbytes,
/*imm*/0 GASNETI_THREAD_PASS);
gasneti_assert(!rc); // Never fails, since never "immediate"
}
break;
#endif

default: gasneti_unreachable_error(("Invalid AM category: 0x%x",(int)category));
}
}

int rc = gasnetc_am_commit(
Expand Down
1 change: 1 addition & 0 deletions ibv-conduit/gasnet_core_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@
VAL(C, RDMA_PUT_BOUNCE, bytes) \
VAL(C, RDMA_PUT_ZEROCP, bytes) \
VAL(C, RDMA_PUT_READONLY, bytes) \
VAL(C, RDMA_PUT_BUFFERED, bytes) \
VAL(C, RDMA_GET_BOUNCE, bytes) \
VAL(C, RDMA_GET_ZEROCP, bytes) \
CNT(C, ALLOC_AM_SPARE, cnt) \
Expand Down
6 changes: 6 additions & 0 deletions ibv-conduit/gasnet_core_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ typedef enum {
// Long payload puts do NOT need fencing (see bug 4049)
GASNETC_OP_LONG_ZEROCP,
GASNETC_OP_LONG_BOUNCE,
GASNETC_OP_LONG_BUFFERED,
// Following all have GASNETC_OP_NEEDS_FENCE bit set
GASNETC_OP_PUT_INLINE = GASNETC_OP_NEEDS_FENCE,
GASNETC_OP_PUT_ZEROCP,
Expand Down Expand Up @@ -1039,6 +1040,10 @@ extern int gasnetc_rdma_long_put(
void *src_ptr, void *dst_ptr, size_t nbytes, gex_Flags_t flags,
gasnetc_atomic_val_t *local_cnt, gasnetc_cb_t local_cb
GASNETI_THREAD_FARG);
extern int gasnetc_rdma_npam_long_put(
gasnetc_EP_t ep, gasnetc_cep_t *cep,
void *src_ptr, void *dst_ptr, size_t nbytes, gex_Flags_t flags
GASNETI_THREAD_FARG);
extern int gasnetc_rdma_get(
gex_TM_t tm, gex_Rank_t rank,
void *src_ptr, void *dst_ptr, size_t nbytes, gex_Flags_t flags,
Expand Down Expand Up @@ -1156,6 +1161,7 @@ extern int gasnetc_am_credits_slack;
extern int gasnetc_alloc_qps; /* Number of QPs per node in gasnetc_ceps[] */
extern int gasnetc_num_qps; /* How many QPs to use per peer */
extern size_t gasnetc_packedlong_limit;
extern size_t gasnetc_packedlong_alloc_limit;
extern size_t gasnetc_inline_limit;
extern size_t gasnetc_nonbulk_bounce_limit;
#if !GASNETC_PIN_SEGMENT
Expand Down
44 changes: 44 additions & 0 deletions ibv-conduit/gasnet_core_sndrcv.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ size_t gasnetc_fh_align_mask;
size_t gasnetc_inline_limit;
size_t gasnetc_nonbulk_bounce_limit;
size_t gasnetc_packedlong_limit; // TODO-EX: adjust w/ nargs?
size_t gasnetc_packedlong_alloc_limit; // TODO-EX: adjust w/ nargs?
size_t gasnetc_put_stripe_sz, gasnetc_put_stripe_split;
size_t gasnetc_get_stripe_sz, gasnetc_get_stripe_split;
#if !GASNETC_PIN_SEGMENT
Expand Down Expand Up @@ -1008,6 +1009,12 @@ void gasnetc_snd_reap_one(struct ibv_wc *comp_p, gasnetc_hca_t *hca GASNETC_COLL
#endif
break;

#if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG
case GASNETC_OP_LONG_BUFFERED: // Zero-copy Long payload with source in the header buffer
gasneti_assert(sreq->comp.cb == NULL);
break;
#endif

case GASNETC_OP_PUT_ZEROCP: // Zero-copy PUT
case GASNETC_OP_LONG_ZEROCP: // Zero-copy Long payload
if (sreq->comp.cb != NULL) {
Expand Down Expand Up @@ -3854,6 +3861,43 @@ extern int gasnetc_rdma_long_put(
return 0;
}

#if GASNET_NATIVE_NP_ALLOC_REQ_LONG || GASNET_NATIVE_NP_ALLOC_REP_LONG
// Put specialized for needs of NPAM Long payload
// * caller needs to control the qpi (via cep)
// * never has local callbacks
// * never has remote callbacks
// * source lies within the buffer containing the AM header
// * assumed never small enough for inline send (would be packed instead)
extern int gasnetc_rdma_npam_long_put(
gasnetc_EP_t ep, gasnetc_cep_t *cep,
void *src_ptr, void *dst_ptr,
size_t nbytes,
gex_Flags_t flags
GASNETI_THREAD_FARG)
{
gasnetc_epid_t epid = cep->epid;
GASNETC_DECL_SR_DESC(sr_desc, GASNETC_SND_SG);
gasnetc_sreq_t * const sreq = gasnetc_get_sreq(GASNETC_OP_LONG_BUFFERED GASNETI_THREAD_PASS);

gasneti_assert(nbytes != 0);

// TODO-EX:
// All uses of rem_auxseg are a temporary hack
// This will be replaced by general multi-registration support later
const int rem_auxseg = gasneti_in_auxsegment(gasnetc_epid2node(epid), dst_ptr, nbytes);

sr_desc->wr.rdma.remote_addr = (uintptr_t)dst_ptr;
sr_desc_sg_lst[0].addr = (uintptr_t)src_ptr;
sreq->bb_buff = src_ptr;

GASNETI_TRACE_EVENT_VAL(C, RDMA_PUT_BUFFERED, nbytes);

gasnetc_bounce_common(ep, epid, rem_auxseg, sr_desc, nbytes, sreq, IBV_WR_RDMA_WRITE GASNETI_THREAD_PASS);

return 0;
}
#endif

/* Perform an RDMA get
*
* Uses zero-copy (with firehose if the destination is not pre-pinned).
Expand Down