From patchwork Sun Feb 14 13:54:26 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vladimir Sokolovsky X-Patchwork-Id: 79286 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o1EDpg92031225 for ; Sun, 14 Feb 2010 13:54:29 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758516Ab0BNNy2 (ORCPT ); Sun, 14 Feb 2010 08:54:28 -0500 Received: from mail.mellanox.co.il ([194.90.237.43]:54161 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1754395Ab0BNNy1 (ORCPT ); Sun, 14 Feb 2010 08:54:27 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vlad@dev.mellanox.co.il) with SMTP; 14 Feb 2010 15:54:25 +0200 Received: from localhost ([10.4.1.80]) by mtlexch01.mtl.com with Microsoft SMTPSVC(6.0.3790.3959); Sun, 14 Feb 2010 15:54:25 +0200 Date: Sun, 14 Feb 2010 15:54:26 +0200 From: Vladimir Sokolovsky To: Roland Dreier Cc: linux-rdma Subject: [PATCH V2 2/2] mlx4/IB: Add support for enhanced atomic operations Message-ID: <20100214135426.GA7666@vlad-laptop> Reply-To: Vladimir Sokolovsky MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.17+20080114 (2008-01-14) X-OriginalArrivalTime: 14 Feb 2010 13:54:25.0165 (UTC) FILETIME=[37C4EBD0:01CAAD7D] X-TM-AS-Product-Ver: SMEX-8.0.0.1181-6.000.1038-17192.007 X-TM-AS-Result: No--4.196500-8.000000-31 X-TM-AS-User-Approved-Sender: No X-TM-AS-User-Blocked-Sender: No Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Sun, 14 Feb 2010 13:54:29 +0000 (UTC) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index de5263b..8dd451e 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -660,6 +660,14 @@ repoll: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; + case MLX4_OPCODE_ATOMIC_MASKED_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX4_OPCODE_ATOMIC_MASKED_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; case MLX4_OPCODE_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e596537..60e1174 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -112,6 +112,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + props->device_cap_flags |= IB_DEVICE_MASKED_ATOMIC; + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2a97c96..51e6a29 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -84,6 +84,8 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_ATOMIC_MASKED_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_MASKED_CS), + [IB_WR_ATOMIC_MASKED_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_MASKED_FA), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1406,6 +1408,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_ATOMIC_MASKED_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; @@ -1413,6 +1418,14 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * } +static void set_mask_atomic_seg(struct mlx4_wqe_mask_atomic_seg *aseg, struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -1566,6 +1579,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_ATOMIC_MASKED_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); @@ -1578,6 +1592,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case IB_WR_ATOMIC_MASKED_CMP_AND_SWP: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_mask_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_mask_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_mask_atomic_seg)) / 16; + + break; + case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e92d1bf..efeb1dd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -123,8 +123,8 @@ enum { MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_FA = 0x12, - MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, - MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_ATOMIC_MASKED_CS = 0x14, + MLX4_OPCODE_ATOMIC_MASKED_FA = 0x15, MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_LOCAL_INVAL = 0x1b, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9f29d86..1a48413 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg { __be64 compare; }; +struct mlx4_wqe_mask_atomic_seg { + __be64 swap_add; + __be64 compare; + __be64 swap_add_mask; + __be64 compare_mask; +}; + struct mlx4_wqe_data_seg { __be32 byte_count; __be32 lkey;