@@ -253,6 +253,38 @@ void print_odp_caps(const struct ibv_odp_caps *caps)
print_odp_trans_caps(caps->per_transport_caps.ud_odp_caps);
}
+void print_csum_caps(const struct ibv_csum_cap_per_link *caps)
+{
+ uint32_t unknown_csum_caps = ~(IBV_CSUM_SUPPORT_RAW |
+ IBV_CSUM_SUPPORT_UD);
+
+ printf("\teth_csum_cap:\n");
+ if (!caps->eth_csum_cap) {
+ printf("\t\t\t\t\tNO_SUPPORT\n");
+ } else {
+ if (caps->eth_csum_cap & IBV_CSUM_SUPPORT_RAW)
+ printf("\t\t\t\t\tRAW_QP_SUPPORT\n");
+ if (caps->eth_csum_cap & IBV_CSUM_SUPPORT_UD)
+ printf("\t\t\t\t\tUD_QP_SUPPORT\n");
+ if (caps->eth_csum_cap & unknown_csum_caps)
+ printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
+ caps->eth_csum_cap & unknown_csum_caps);
+ }
+
+ printf("\tib_csum_cap:\n");
+ if (!caps->ib_csum_cap) {
+ printf("\t\t\t\t\tNO_SUPPORT\n");
+ } else {
+ if (caps->ib_csum_cap & IBV_CSUM_SUPPORT_RAW)
+ printf("\t\t\t\t\tRAW_QP_SUPPORT\n");
+ if (caps->ib_csum_cap & IBV_CSUM_SUPPORT_UD)
+ printf("\t\t\t\t\tUD_QP_SUPPORT\n");
+ if (caps->ib_csum_cap & unknown_csum_caps)
+ printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
+ caps->ib_csum_cap & unknown_csum_caps);
+ }
+}
+
static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
{
struct ibv_context *ctx;
@@ -339,6 +371,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.orig_attr.local_ca_ack_delay);
print_odp_caps(&device_attr.odp_caps);
+ print_csum_caps(&device_attr.csum_cap);
}
for (port = 1; port <= device_attr.orig_attr.phys_port_cnt; ++port) {
@@ -262,11 +262,18 @@ struct ibv_odp_caps_resp {
__u32 reserved;
};
+struct ibv_csum_cap_per_link_resp {
+ __u32 eth_csum_cap;
+ __u32 ib_csum_cap;
+};
+
struct ibv_query_device_resp_ex {
struct ibv_query_device_resp base;
__u32 comp_mask;
__u32 response_length;
struct ibv_odp_caps_resp odp_caps;
+ __u64 reserved0[2];
+ struct ibv_csum_cap_per_link_resp csum_cap;
};
struct ibv_query_port {
@@ -196,10 +196,16 @@ enum ibv_odp_general_caps {
IBV_ODP_SUPPORT = 1 << 0,
};
+struct ibv_csum_cap_per_link {
+ uint32_t eth_csum_cap;
+ uint32_t ib_csum_cap;
+};
+
struct ibv_device_attr_ex {
struct ibv_device_attr orig_attr;
uint32_t comp_mask;
struct ibv_odp_caps odp_caps;
+ struct ibv_csum_cap_per_link csum_cap;
};
enum ibv_mtu {
@@ -348,9 +354,14 @@ enum ibv_wc_opcode {
IBV_WC_RECV_RDMA_WITH_IMM
};
+enum {
+ IBV_WC_IP_CSUM_OK_SHIFT = 2
+};
+
enum ibv_wc_flags {
IBV_WC_GRH = 1 << 0,
- IBV_WC_WITH_IMM = 1 << 1
+ IBV_WC_WITH_IMM = 1 << 1,
+ IBV_WC_IP_CSUM_OK = 1 << IBV_WC_IP_CSUM_OK_SHIFT
};
struct ibv_wc {
@@ -646,6 +657,11 @@ enum ibv_mig_state {
IBV_MIG_ARMED
};
+enum ibv_csum_cap_flags {
+ IBV_CSUM_SUPPORT_UD = 1 << IBV_QPT_UD,
+ IBV_CSUM_SUPPORT_RAW = 1 << IBV_QPT_RAW_PACKET,
+};
+
struct ibv_qp_attr {
enum ibv_qp_state qp_state;
enum ibv_qp_state cur_qp_state;
@@ -688,7 +704,8 @@ enum ibv_send_flags {
IBV_SEND_FENCE = 1 << 0,
IBV_SEND_SIGNALED = 1 << 1,
IBV_SEND_SOLICITED = 1 << 2,
- IBV_SEND_INLINE = 1 << 3
+ IBV_SEND_INLINE = 1 << 3,
+ IBV_SEND_IP_CSUM = 1 << 4
};
struct ibv_sge {
@@ -1459,6 +1476,7 @@ ibv_query_device_ex(struct ibv_context *context,
legacy:
memset(attr, 0, sizeof(*attr));
ret = ibv_query_device(context, &attr->orig_attr);
+
return ret;
}
@@ -50,6 +50,11 @@ It is either 0 or the bitwise OR of one or more of the following flags:
.B IBV_WC_GRH \fR GRH is present (valid only for UD QPs)
.TP
.B IBV_WC_WITH_IMM \fR Immediate data value is valid
+.TP
+.B IBV_WC_IP_CSUM_OK \fR TCP/UDP checksum over IPv4 and IPv4 header checksum are
+verified.
+This feature is supported only when \fBcsum_cap\fR in device_attr indicates
+current QP under current link layer is supported.
.PP
Not all
.I wc
@@ -98,6 +98,10 @@ The attribute send_flags describes the properties of the \s-1WR\s0. It is either
.TP
.B IBV_SEND_INLINE \fR Send data in given gather list as inline data
in a send WQE. Valid only for Send and RDMA Write. The L_Key will not be checked.
+.TP
+.B IBV_SEND_IP_CSUM \fR Offload the IPv4 and TCP/UDP checksum calculation.
+This feature is supported only when \fBcsum_cap\fR in device_attr indicates
+current QP under current link layer is supported.
.SH "RETURN VALUE"
.B ibv_post_send()
returns 0 on success, or the value of errno on failure (which indicates the failure reason).
@@ -160,6 +160,7 @@ int ibv_cmd_query_device_ex(struct ibv_context *context,
IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
QUERY_DEVICE_EX, resp, resp_core_size,
resp_size);
+
cmd->comp_mask = 0;
cmd->reserved = 0;
memset(attr->orig_attr.fw_ver, 0, sizeof(attr->orig_attr.fw_ver));
@@ -189,6 +190,18 @@ int ibv_cmd_query_device_ex(struct ibv_context *context,
}
}
+ if (attr_size >= offsetof(struct ibv_device_attr_ex, csum_cap) +
+ sizeof(attr->csum_cap)) {
+ if (resp->response_length >=
+ offsetof(struct ibv_query_device_resp_ex, csum_cap) +
+ sizeof(resp->csum_cap)) {
+ attr->csum_cap.eth_csum_cap = resp->csum_cap.eth_csum_cap;
+ attr->csum_cap.ib_csum_cap = resp->csum_cap.ib_csum_cap;
+ } else {
+ memset(&attr->csum_cap, 0, sizeof(attr->csum_cap));
+ }
+ }
+
return 0;
}
Add a device capability field csum_cap to denote IPv4 checksum offload support. Devices should configure this field if they support insertion/verification of IPv4, TCP and UDP checksums on outgoing/incoming IPv4 packets according link layer and QP types. Flags IBV_SEND_IP_CSUM and IBV_WC_IP_CSUM_OK are added for utilizing this capability for send and receive separately. Signed-off-by: Bodong Wang <bodong@mellanox.com> --- examples/devinfo.c | 33 +++++++++++++++++++++++++++++++++ include/infiniband/kern-abi.h | 7 +++++++ include/infiniband/verbs.h | 22 ++++++++++++++++++++-- man/ibv_poll_cq.3 | 5 +++++ man/ibv_post_send.3 | 4 ++++ src/cmd.c | 13 +++++++++++++ 6 files changed, 82 insertions(+), 2 deletions(-)