diff mbox series

[for-next,v6,7/7] RDMA/rxe: Add support for the traditional Atomic operations with ODP

Message ID 908514dfa6bbeae72d36481d893674b254ee416d.1694153251.git.matsuda-daisuke@fujitsu.com (mailing list archive)
State Superseded
Headers show
Series On-Demand Paging on SoftRoCE | expand

Commit Message

Daisuke Matsuda (Fujitsu) Sept. 8, 2023, 6:26 a.m. UTC
Enable 'fetch and add' and 'compare and swap' operations to be used with
ODP. This is comprised of the following steps:
 1. Verify that the page is present with write permission.
 2. If OK, execute the operation and exit.
 3. If not, then trigger page fault to map the page.
 4. Update the entry in the MR xarray.
 5. Execute the operation.

umem_mutex is used to ensure that the target page is not invalidated before
data access completes. It also protects the lists in umem_odp and the MR
xarray.

Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
---
 drivers/infiniband/sw/rxe/rxe.c      |  1 +
 drivers/infiniband/sw/rxe/rxe_loc.h  |  9 ++++++
 drivers/infiniband/sw/rxe/rxe_odp.c  | 43 ++++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_resp.c |  5 +++-
 4 files changed, 57 insertions(+), 1 deletion(-)

Comments

Jason Gunthorpe Sept. 8, 2023, 2:29 p.m. UTC | #1
On Fri, Sep 08, 2023 at 03:26:48PM +0900, Daisuke Matsuda wrote:
> +int rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
> +			 u64 compare, u64 swap_add, u64 *orig_val)
> +{
> +	int err;
> +	int retry = 0;
> +	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
> +
> +	mutex_lock(&umem_odp->umem_mutex);
> +
> +	/* Atomic operations manipulate a single char. */
> +	if (rxe_odp_check_pages(mr, iova, sizeof(char), 0))
> +		goto need_fault;
> +
> +	err = rxe_mr_do_atomic_op(mr, iova, opcode, compare,
> +				  swap_add, orig_val);
> +
> +	mutex_unlock(&umem_odp->umem_mutex);

You should just use the xarray spinlock, the umem_mutex should only be
held around the faulting flow

> +
> +	return err;
> +
> +need_fault:
> +	/* allow max 3 tries for pagefault */
> +	do {

Why a retry loop? We already have a retry loop in
ib_umem_odp_map_dma_and_lock,it doesn't need to be done externally. If
you reach here with the lock held then progress should be guarenteed
under the lock.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 207a022156f0..abd3267c2873 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -88,6 +88,7 @@  static void rxe_init_device_param(struct rxe_dev *rxe)
 		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
 		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
 		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
+		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
 		rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
 	}
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index eeaeff8a1398..0bae9044f362 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -194,6 +194,9 @@  int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
 			 u64 iova, int access_flags, struct rxe_mr *mr);
 int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		    enum rxe_mr_copy_dir dir);
+int rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+			 u64 compare, u64 swap_add, u64 *orig_val);
+
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline int
 rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@@ -207,6 +210,12 @@  rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
 {
 	return -EOPNOTSUPP;
 }
+static inline int
+rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+		     u64 compare, u64 swap_add, u64 *orig_val)
+{
+	return RESPST_ERR_UNSUPPORTED_OPCODE;
+}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
index da1c0753db93..289c60cbda12 100644
--- a/drivers/infiniband/sw/rxe/rxe_odp.c
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -268,3 +268,46 @@  int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 
 	return err;
 }
+
+int rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+			 u64 compare, u64 swap_add, u64 *orig_val)
+{
+	int err;
+	int retry = 0;
+	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+
+	mutex_lock(&umem_odp->umem_mutex);
+
+	/* Atomic operations manipulate a single char. */
+	if (rxe_odp_check_pages(mr, iova, sizeof(char), 0))
+		goto need_fault;
+
+	err = rxe_mr_do_atomic_op(mr, iova, opcode, compare,
+				  swap_add, orig_val);
+
+	mutex_unlock(&umem_odp->umem_mutex);
+
+	return err;
+
+need_fault:
+	/* allow max 3 tries for pagefault */
+	do {
+		mutex_unlock(&umem_odp->umem_mutex);
+
+		if (retry > 2)
+			return -EFAULT;
+
+		/* umem_mutex is locked on success */
+		err = rxe_odp_do_pagefault_and_lock(mr, iova, sizeof(char), 0);
+		if (err < 0)
+			return err;
+		retry++;
+	} while (rxe_odp_check_pages(mr, iova, sizeof(char), 0));
+
+	err = rxe_mr_do_atomic_op(mr, iova, opcode, compare,
+				  swap_add, orig_val);
+
+	mutex_unlock(&umem_odp->umem_mutex);
+
+	return err;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 9159f1bdfc6f..af3e669679a0 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -693,7 +693,10 @@  static enum resp_states atomic_reply(struct rxe_qp *qp,
 		u64 iova = qp->resp.va + qp->resp.offset;
 
 		if (mr->umem->is_odp)
-			err = RESPST_ERR_UNSUPPORTED_OPCODE;
+			err = rxe_odp_mr_atomic_op(mr, iova, pkt->opcode,
+						   atmeth_comp(pkt),
+						   atmeth_swap_add(pkt),
+						   &res->atomic.orig_val);
 		else
 			err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
 						  atmeth_comp(pkt),