From patchwork Wed Apr 14 14:23:39 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vladimir Sokolovsky X-Patchwork-Id: 92415 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o3EEN1hr022571 for ; Wed, 14 Apr 2010 14:23:42 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755719Ab0DNOXm (ORCPT ); Wed, 14 Apr 2010 10:23:42 -0400 Received: from mail.mellanox.co.il ([194.90.237.43]:48915 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1755706Ab0DNOXl (ORCPT ); Wed, 14 Apr 2010 10:23:41 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vlad@mellanox.co.il) with SMTP; 14 Apr 2010 17:23:39 +0300 Received: from localhost ([10.4.1.80]) by mtlexch01.mtl.com with Microsoft SMTPSVC(6.0.3790.3959); Wed, 14 Apr 2010 17:23:39 +0300 Date: Wed, 14 Apr 2010 17:23:39 +0300 From: Vladimir Sokolovsky To: rdreier@cisco.com Cc: linux-rdma@vger.kernel.org Subject: [PATCH V4 2/2] mlx4/IB: Add support for enhanced atomic operations Message-ID: <20100414142339.GC16346@vlad-laptop> Reply-To: vlad@dev.mellanox.co.il MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.17+20080114 (2008-01-14) X-OriginalArrivalTime: 14 Apr 2010 14:23:39.0182 (UTC) FILETIME=[139DECE0:01CADBDE] X-TM-AS-Product-Ver: SMEX-8.0.0.1181-6.000.1038-17318.007 X-TM-AS-Result: No--6.910300-8.000000-31 X-TM-AS-User-Approved-Sender: No X-TM-AS-User-Blocked-Sender: No Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 14 Apr 2010 14:23:43 +0000 (UTC) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index de5263b..783eae7 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -660,6 +660,14 @@ repoll: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; + case MLX4_OPCODE_MASKED_ATOMIC_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX4_OPCODE_MASKED_ATOMIC_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; case MLX4_OPCODE_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e596537..9b2a526 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -138,6 +138,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_HCA; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2a97c96..2ae8c81 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -84,6 +84,8 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1406,6 +1408,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; @@ -1413,6 +1418,14 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * } +static void set_mask_atomic_seg(struct mlx4_wqe_mask_atomic_seg *aseg, struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -1566,6 +1579,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); @@ -1578,6 +1592,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_mask_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_mask_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_mask_atomic_seg)) / 16; + + break; + case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e92d1bf..7a7f9c1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -123,8 +123,8 @@ enum { MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_FA = 0x12, - MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, - MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14, + MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15, MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_LOCAL_INVAL = 0x1b, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9f29d86..1a48413 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg { __be64 compare; }; +struct mlx4_wqe_mask_atomic_seg { + __be64 swap_add; + __be64 compare; + __be64 swap_add_mask; + __be64 compare_mask; +}; + struct mlx4_wqe_data_seg { __be32 byte_count; __be32 lkey;