Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/cart/README.env
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,25 @@ This file lists the environment variables used in CaRT.
D_PROVIDER_AUTH_KEY is assumed to be empty.
Supports a comma separated list of keys, similar to D_INTERFACE handling

. D_ADDR_FORMAT
Selects the preferred IP address family for fabric initialization. The
value is forwarded to Mercury via na_init_info.addr_format and used as
a hint by libfabric's fabric scan to enumerate interfaces of the chosen
family. Accepted values (case-sensitive):
- "unspec" (default) - leave it to the plugin; Mercury's na_ofi
plugin falls back to its per-provider
preference (IPv4 for verbs/RoCE).
- "ipv4" - prefer IPv4 (FI_SOCKADDR_IN).
- "ipv6" - prefer IPv6 (FI_SOCKADDR_IN6). Required for
IPv6-only fabric NIC deployments where the
default IPv4 preference would hide the only
usable interfaces.
- "native" - provider native addressing.
Unrecognized values fall back silently to "unspec" rather than failing
initialization. Supports a comma separated list of values, similar to
D_INTERFACE handling; entries are matched one-to-one with the
comma-separated D_PROVIDER list for multi-provider configurations.

. CRT_CREDIT_EP_CTX
Set it as the max number of in-flight RPCs to a target endpoint context, the
valid range is [0, 256].
Expand Down
13 changes: 13 additions & 0 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,19 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_

init_info.na_init_info.auth_key = prov_data->cpg_na_config.noc_auth_key;

/*
* Forward the per-provider address-family preference to Mercury. The
* default (CRT_AF_UNSPEC -> NA_ADDR_UNSPEC) preserves the original
* behavior: Mercury's na_ofi plugin falls back to its per-provider
* preference table (IPv4 for verbs/RoCE). Setting D_ADDR_FORMAT=ipv6
* (or cio_addr_format="ipv6") instead steers libfabric's fabric scan
* to enumerate IPv6 interfaces, which is required for IPv6-only
* fabric NIC deployments. CRT_AF_* values are statically asserted to
* match the corresponding NA_ADDR_* values in crt_init.c, so the
* direct cast is safe.
*/
init_info.na_init_info.addr_format = (enum na_addr_format)prov_data->cpg_addr_format;

if (crt_provider_is_block_mode(provider) && !crt_gdata.cg_progress_busy)
init_info.na_init_info.progress_mode = 0;
else
Expand Down
69 changes: 63 additions & 6 deletions src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ static bool g_prov_settings_applied[CRT_PROV_COUNT];
static const char *const crt_tc_name[] = {CRT_TRAFFIC_CLASSES};
#undef X

#define X(a, b) b,
static const char *const crt_addr_format_name[] = {CRT_ADDR_FORMATS};
#undef X

#define CRT_ENV_OPT_GET(opt, x, env) \
do { \
if (opt != NULL && opt->cio_##x) \
Expand All @@ -35,7 +39,7 @@ static const char *const crt_tc_name[] = {CRT_TRAFFIC_CLASSES};
static int
crt_init_prov(crt_provider_t provider, bool primary, struct crt_prov_gdata *prov_gdata,
const char *interface, const char *domain, const char *port, const char *auth_key,
bool port_auto_adjust, crt_init_options_t *opt);
const char *addr_format, bool port_auto_adjust, crt_init_options_t *opt);

static void
crt_lib_init(void) __attribute__((__constructor__));
Expand Down Expand Up @@ -96,6 +100,7 @@ dump_opt(crt_init_options_t *opt)
D_INFO("domain = %s\n", opt->cio_domain);
D_INFO("port = %s\n", opt->cio_port);
D_INFO("auth_key = %s\n", opt->cio_auth_key);
D_INFO("addr_format = %s\n", opt->cio_addr_format);
D_INFO("ep_credits = %d\n", opt->cio_ep_credits);
D_INFO("Flags: fault_inject = %d, use_sensors = %d, thread_mode_single = %d, "
"progress_busy = %d, mem_device = %d\n",
Expand Down Expand Up @@ -259,6 +264,37 @@ crt_str_to_tc(const char *str)
return i == CRT_TC_UNKNOWN ? CRT_TC_UNSPEC : i;
}

/*
* Parse a textual address-format hint into the matching enum value.
* Falls back to CRT_AF_UNSPEC (Mercury default) on NULL, empty, or
* unrecognized input — keeps the historical behavior for users who
* don't set the option, and avoids surfacing typos as init failures.
*/
static enum crt_addr_format
crt_str_to_addr_format(const char *str)
{
enum crt_addr_format i = 0;

if (str == NULL || str[0] == '\0')
return CRT_AF_UNSPEC;

while (strcmp(crt_addr_format_name[i], str) != 0 && i < CRT_AF_UNKNOWN)
i++;

return i == CRT_AF_UNKNOWN ? CRT_AF_UNSPEC : i;
}

/*
* CRT_AF_* values are kept aligned with Mercury's enum na_addr_format so
* that crt_hg.c can cast directly when assigning na_init_info.addr_format
* (mirroring how cg_swim_tc is cast to enum na_traffic_class). The static
* assertions below catch any future drift between the two enums.
*/
D_CASSERT((int)CRT_AF_UNSPEC == (int)NA_ADDR_UNSPEC);
D_CASSERT((int)CRT_AF_IPV4 == (int)NA_ADDR_IPV4);
D_CASSERT((int)CRT_AF_IPV6 == (int)NA_ADDR_IPV6);
D_CASSERT((int)CRT_AF_NATIVE == (int)NA_ADDR_NATIVE);

/* first step init - for initializing crt_gdata */
static int
data_init(int server, crt_init_options_t *opt)
Expand Down Expand Up @@ -603,11 +639,12 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
int rc = 0;
crt_provider_t prov;
char *provider = NULL, *interface = NULL, *domain = NULL, *port = NULL, *auth_key = NULL;
char *addr_format = NULL;
char *path = NULL;
char *provider_str = NULL, *interface_str = NULL, *domain_str = NULL, *port_str = NULL,
*auth_key_str = NULL;
*auth_key_str = NULL, *addr_format_str = NULL;
char *save_provider_str = NULL, *save_interface_str = NULL, *save_domain_str = NULL,
*save_port_str = NULL, *save_auth_key_str = NULL;
*save_port_str = NULL, *save_auth_key_str = NULL, *save_addr_format_str = NULL;
bool port_auto_adjust = false, thread_mode_single = false, progress_busy = false,
mem_device = false;
int i;
Expand Down Expand Up @@ -680,6 +717,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
CRT_ENV_OPT_GET(opt, domain, D_DOMAIN);
CRT_ENV_OPT_GET(opt, port, D_PORT);
CRT_ENV_OPT_GET(opt, auth_key, D_PROVIDER_AUTH_KEY);
CRT_ENV_OPT_GET(opt, addr_format, D_ADDR_FORMAT);

crt_env_get(D_PORT_AUTO_ADJUST, &port_auto_adjust);

Expand Down Expand Up @@ -743,6 +781,13 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
D_GOTO(unlock, rc = -DER_NOMEM);
auth_key = strtok_r(auth_key_str, ",", &save_auth_key_str);
}

if (addr_format != NULL) {
D_STRNDUP(addr_format_str, addr_format, CRT_ENV_STR_MAX_SIZE);
if (addr_format_str == NULL)
D_GOTO(unlock, rc = -DER_NOMEM);
addr_format = strtok_r(addr_format_str, ",", &save_addr_format_str);
}
}

prov = crt_str_to_provider(provider);
Expand All @@ -768,7 +813,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
* and processed in crt_na_config_init().
*/
rc = crt_init_prov(prov, true, &crt_gdata.cg_prov_gdata_primary, interface, domain, port,
auth_key, port_auto_adjust, opt);
auth_key, addr_format, port_auto_adjust, opt);
if (rc != 0)
D_GOTO(unlock, rc);

Expand Down Expand Up @@ -812,6 +857,8 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
port = strtok_r(NULL, ",", &save_port_str);
if (auth_key != NULL)
auth_key = strtok_r(NULL, ",", &save_auth_key_str);
if (addr_format != NULL)
addr_format = strtok_r(NULL, ",", &save_addr_format_str);

/* Secondary provider needs its own interface or domain */
if (interface == NULL && domain == NULL) {
Expand All @@ -825,7 +872,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)

rc = crt_init_prov(crt_gdata.cg_secondary_provs[i], false,
&crt_gdata.cg_prov_gdata_secondary[i], interface, domain,
port, auth_key, port_auto_adjust, opt);
port, auth_key, addr_format, port_auto_adjust, opt);
if (rc != 0) {
D_ERROR("crt_init_prov() failed for secondary provider, " DF_RC
"\n",
Expand Down Expand Up @@ -904,6 +951,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
D_FREE(domain_str);
D_FREE(port_str);
D_FREE(auth_key_str);
D_FREE(addr_format_str);

if (rc != 0) {
D_ERROR("failed, " DF_RC "\n", DP_RC(rc));
Expand All @@ -916,7 +964,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt)
static int
crt_init_prov(crt_provider_t provider, bool primary, struct crt_prov_gdata *prov_gdata,
const char *interface, const char *domain, const char *port, const char *auth_key,
bool port_auto_adjust, crt_init_options_t *opt)
const char *addr_format, bool port_auto_adjust, crt_init_options_t *opt)
{
int rc;

Expand All @@ -926,6 +974,15 @@ crt_init_prov(crt_provider_t provider, bool primary, struct crt_prov_gdata *prov

prov_settings_apply(primary, provider, opt);

/*
* Record the requested address family on the per-provider gdata so it
* can be forwarded to Mercury via na_init_info.addr_format when each
* HG class is initialized (see crt_hg.c::crt_hg_class_init). Unknown
* or unset values resolve to CRT_AF_UNSPEC, preserving the historical
* Mercury-default behavior.
*/
prov_gdata->cpg_addr_format = crt_str_to_addr_format(addr_format);

rc = crt_na_config_init(primary, provider, interface, domain, port, auth_key,
port_auto_adjust);
if (rc != 0) {
Expand Down
34 changes: 34 additions & 0 deletions src/cart/crt_internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,32 @@ struct crt_na_config {
enum crt_traffic_class { CRT_TRAFFIC_CLASSES };
#undef X

/*
* Preferred address family for fabric init. Forwarded to Mercury via
* na_init_info.addr_format and translated by Mercury's na_ofi plugin
* into the libfabric addr_format hint (FI_SOCKADDR_IN / FI_SOCKADDR_IN6
* / provider native / FI_FORMAT_UNSPEC).
*
* Default is CRT_AF_UNSPEC, which preserves the historical behavior of
* letting Mercury pick from its per-provider preference table (IPv4 for
* verbs/RoCE). Set to CRT_AF_IPV6 to enable IPv6 fabric on an interface
* that lacks an IPv4 address.
*
* CRT_AF_UNKNOWN is a sentinel returned by crt_str_to_addr_format() when
* the input string does not match any known value; callers map it back
* to CRT_AF_UNSPEC.
*/
#define CRT_ADDR_FORMATS \
X(CRT_AF_UNSPEC, "unspec") /* Leave it upon plugin to choose (default) */ \
X(CRT_AF_IPV4, "ipv4") /* Prefer IPv4 (FI_SOCKADDR_IN) */ \
X(CRT_AF_IPV6, "ipv6") /* Prefer IPv6 (FI_SOCKADDR_IN6) */ \
X(CRT_AF_NATIVE, "native") /* Provider native addressing */ \
X(CRT_AF_UNKNOWN, "unknown") /* Unknown / parse error sentinel */

#define X(a, b) a,
enum crt_addr_format { CRT_ADDR_FORMATS };
#undef X

struct crt_prov_gdata {
/** NA plugin type */
int cpg_provider;
Expand All @@ -77,6 +103,13 @@ struct crt_prov_gdata {
uint32_t cpg_max_exp_size;
uint32_t cpg_max_unexp_size;

/**
* Preferred address family for Mercury fabric init for this provider.
* Defaults to CRT_AF_UNSPEC (Mercury picks). Set via D_ADDR_FORMAT env
* or crt_init_options_t::cio_addr_format API field.
*/
enum crt_addr_format cpg_addr_format;

/** Number of remote tags */
uint32_t cpg_num_remote_tags;
uint32_t cpg_last_remote_tag;
Expand Down Expand Up @@ -219,6 +252,7 @@ struct crt_event_cb_priv {
ENV_STR(DD_MASK) \
ENV_STR(DD_STDERR) \
ENV_STR(DD_SUBSYS) \
ENV_STR(D_ADDR_FORMAT) \
ENV_STR(D_CLIENT_METRICS_DUMP_DIR) \
ENV(D_CLIENT_METRICS_ENABLE) \
ENV(D_CLIENT_METRICS_RETAIN) \
Expand Down
13 changes: 13 additions & 0 deletions src/include/cart/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ typedef struct crt_init_options {
/** If set, used as the authentication key instead of D_PROVIDER_AUTH_KEY env */
char *cio_auth_key;

/**
* If set, used as the preferred address family for fabric init instead of
* the D_ADDR_FORMAT env. Accepted values: "unspec" (default), "ipv4",
* "ipv6", "native". The value is forwarded to Mercury via
* na_init_info.addr_format, which lets libfabric's fabric scan find
* interfaces of the chosen family. Useful for IPv6-only fabric
* deployments where the default IPv4 preference would otherwise hide
* the only usable interfaces. For multi-provider configurations, the
* value may be a comma-separated list (one entry per provider, same
* ordering as cio_provider).
*/
char *cio_addr_format;

/** use single thread to access context */
bool cio_thread_mode_single;

Expand Down
Loading