@@ -329,6 +329,14 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
wc->sl = ntohs(cqe->sl_vid) >> 13;
else
wc->sl = ntohs(cqe->sl_vid) >> 12;
+
+ if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_IPV4)) {
+ uint32_t status = ntohl(cqe->status);
+ wc->wc_flags |= ((status & MLX4_CQE_STATUS_IPV4_CSUM_OK) ==
+ MLX4_CQE_STATUS_IPV4_CSUM_OK) <<
+ IBV_WC_IP_CSUM_OK_SHIFT;
+ }
+
}
return CQ_OK;
@@ -257,6 +257,7 @@ struct mlx4_qp {
struct mlx4_wq rq;
uint8_t link_layer;
+ uint8_t qp_cap_cache;
};
struct mlx4_av {
@@ -279,6 +280,19 @@ struct mlx4_ah {
uint8_t mac[6];
};
+enum {
+ MLX4_RX_CSUM_IPV4 = (1 << 0),
+};
+
+enum mlx4_cqe_status {
+ MLX4_CQE_STATUS_TCP_UDP_CSUM_OK = (1 << 2),
+ MLX4_CQE_STATUS_IPV4_PKT = (1 << 22),
+ MLX4_CQE_STATUS_IP_HDR_CSUM_OK = (1 << 28),
+ MLX4_CQE_STATUS_IPV4_CSUM_OK = MLX4_CQE_STATUS_IPV4_PKT |
+ MLX4_CQE_STATUS_IP_HDR_CSUM_OK |
+ MLX4_CQE_STATUS_TCP_UDP_CSUM_OK
+};
+
struct mlx4_cqe {
uint32_t vlan_my_qpn;
uint32_t immed_rss_invalid;
@@ -286,7 +300,7 @@ struct mlx4_cqe {
uint8_t sl_vid;
uint8_t reserved1;
uint16_t rlid;
- uint32_t reserved2;
+ uint32_t status;
uint32_t byte_cnt;
uint16_t wqe_index;
uint16_t checksum;
@@ -200,6 +200,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
int ret = 0;
int size;
int i;
+ int is_csum;
pthread_spin_lock(&qp->sq.lock);
@@ -286,15 +287,23 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
break;
case IBV_QPT_UD:
+ is_csum = !!(wr->send_flags & IBV_SEND_IP_CSUM);
set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+ ctrl->srcrb_flags |=
+ htonl((is_csum << MLX4_WQE_CTRL_IP_HDR_CSUM_SHIFT) |
+ (is_csum << MLX4_WQE_CTRL_TCP_UDP_CSUM_SHIFT));
break;
case IBV_QPT_RAW_PACKET:
+ is_csum = !!(wr->send_flags & IBV_SEND_IP_CSUM);
/* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used
* to indicate that no icrc should be calculated */
- ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT);
+ ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT |
+ (is_csum << MLX4_WQE_CTRL_IP_HDR_CSUM_SHIFT) |
+ (is_csum << MLX4_WQE_CTRL_TCP_UDP_CSUM_SHIFT));
break;
default:
@@ -606,14 +606,26 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
struct ibv_modify_qp cmd;
struct ibv_port_attr port_attr;
struct mlx4_qp *mqp = to_mqp(qp);
+ struct ibv_device_attr device_attr;
int ret;
+ memset(&device_attr, 0, sizeof(device_attr));
if (attr_mask & IBV_QP_PORT) {
ret = ibv_query_port(qp->context, attr->port_num,
&port_attr);
if (ret)
return ret;
mqp->link_layer = port_attr.link_layer;
+
+ if ((qp->qp_type == IBV_QPT_UD) || (qp->qp_type == IBV_QPT_RAW_PACKET))
+ {
+ ret = ibv_query_device(qp->context, &device_attr);
+ if (ret)
+ return ret;
+
+ if (device_attr.device_cap_flags & IBV_DEVICE_IP_CSUM)
+ mqp->qp_cap_cache |= MLX4_RX_CSUM_IPV4;
+ }
}
if (qp->state == IBV_QPS_RESET &&
@@ -44,6 +44,11 @@ enum {
};
enum {
+ MLX4_WQE_CTRL_IP_HDR_CSUM_SHIFT = 4,
+ MLX4_WQE_CTRL_TCP_UDP_CSUM_SHIFT = 5
+};
+
+enum {
MLX4_INLINE_SEG = 1 << 31,
MLX4_INLINE_ALIGN = 64,
};
RX checksum verification status is reported through wc_flag when polling CQ. When IBV_WC_IP_CSUM_OK is set, that means both IPv4 header checksum and TCP/UDP checksum are OK. TX checksum offload will be enabled for TCP/UDP over IPv4 if user sets send_flag IBV_SEND_IP_CSUM. A new field, qp_cap_cache, is added to mlx4_qp in order to 'cache' the device capabilities to minimize perfromance hit on poll_one function. The capabilities are set during mlx4_modify_qp for RAW ETH and UD QPs. Signed-off-by: Bodong Wang <bodong@mellanox.com> --- src/cq.c | 8 ++++++++ src/mlx4.h | 16 +++++++++++++++- src/qp.c | 11 ++++++++++- src/verbs.c | 12 ++++++++++++ src/wqe.h | 5 +++++ 5 files changed, 50 insertions(+), 2 deletions(-)