diff mbox series

[for-next,v5,4/6] RDMA-rxe: Isolate mr code from atomic_write_reply()

Message ID 20230116235602.22899-5-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Headers show
Series RDMA/rxe: Replace mr page map with an xarray | expand

Commit Message

Bob Pearson Jan. 16, 2023, 11:56 p.m. UTC
Isolate mr specific code from atomic_write_reply() in rxe_resp.c into
a subroutine rxe_mr_do_atomic_write() in rxe_mr.c.
Check length for atomic write operation.
Make iova_to_vaddr() static.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h  |  1 +
 drivers/infiniband/sw/rxe/rxe_mr.c   | 34 ++++++++++++++
 drivers/infiniband/sw/rxe/rxe_resp.c | 69 ++++++++++++----------------
 3 files changed, 64 insertions(+), 40 deletions(-)

Comments

Jason Gunthorpe Jan. 17, 2023, 3:11 p.m. UTC | #1
On Mon, Jan 16, 2023 at 05:56:01PM -0600, Bob Pearson wrote:

> +/* only implemented for 64 bit architectures */
> +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
> +{
> +#if defined CONFIG_64BIT

#ifdef

It is a little more typical style to provide an alternate version of
the function when #ifdefing

> +	u64 *va;
> +
> +	/* See IBA oA19-28 */
> +	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
> +		rxe_dbg_mr(mr, "mr not in valid state");
> +		return -EINVAL;
> +	}
> +
> +	va = iova_to_vaddr(mr, iova, sizeof(value));
> +	if (unlikely(!va)) {
> +		rxe_dbg_mr(mr, "iova out of range");
> +		return -ERANGE;
> +	}
> +
> +	/* See IBA A19.4.2 */
> +	if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) {
> +		rxe_dbg_mr(mr, "misaligned address");
> +		return -RXE_ERR_NOT_ALIGNED;
> +	}
> +
> +	/* Do atomic write after all prior operations have completed */
> +	smp_store_release(va, value);
> +
> +	return 0;
> +#else
> +	WARN_ON(1);
> +	return -EINVAL;
> +#endif
> +}
> +
>  int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
>  {
>  	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index 1e38e5da1f4c..49298ff88d25 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -764,30 +764,40 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>  	return RESPST_ACKNOWLEDGE;
>  }
>  
> -#ifdef CONFIG_64BIT
> -static enum resp_states do_atomic_write(struct rxe_qp *qp,
> -					struct rxe_pkt_info *pkt)
> +static enum resp_states atomic_write_reply(struct rxe_qp *qp,
> +					   struct rxe_pkt_info *pkt)
>  {
> -	struct rxe_mr *mr = qp->resp.mr;
> -	int payload = payload_size(pkt);
> -	u64 src, *dst;
> -
> -	if (mr->state != RXE_MR_STATE_VALID)
> -		return RESPST_ERR_RKEY_VIOLATION;
> +	struct resp_res *res = qp->resp.res;
> +	struct rxe_mr *mr;
> +	u64 value;
> +	u64 iova;
> +	int err;
>  
> -	memcpy(&src, payload_addr(pkt), payload);
> +	if (!res) {
> +		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
> +		qp->resp.res = res;
> +	}
>  
> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
> -	/* check vaddr is 8 bytes aligned. */
> -	if (!dst || (uintptr_t)dst & 7)
> -		return RESPST_ERR_MISALIGNED_ATOMIC;
> +	if (res->replay)
> +		return RESPST_ACKNOWLEDGE;
>  
> -	/* Do atomic write after all prior operations have completed */
> -	smp_store_release(dst, src);
> +	mr = qp->resp.mr;
> +	value = *(u64 *)payload_addr(pkt);
> +	iova = qp->resp.va + qp->resp.offset;
>  
> -	/* decrease resp.resid to zero */
> -	qp->resp.resid -= sizeof(payload);
> +#if defined CONFIG_64BIT

Shouldn't need a #ifdef here

> +	err = rxe_mr_do_atomic_write(mr, iova, value);
> +	if (unlikely(err)) {
> +		if (err == -RXE_ERR_NOT_ALIGNED)
> +			return RESPST_ERR_MISALIGNED_ATOMIC;
> +		else
> +			return RESPST_ERR_RKEY_VIOLATION;

Again why not return the RESPST directly then the stub can return
RESPST_ERR_UNSUPPORTED_OPCODE?

Jason
Bob Pearson Jan. 17, 2023, 4:57 p.m. UTC | #2
On 1/17/23 09:11, Jason Gunthorpe wrote:
> On Mon, Jan 16, 2023 at 05:56:01PM -0600, Bob Pearson wrote:
> 
>> +/* only implemented for 64 bit architectures */
>> +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
>> +{
>> +#if defined CONFIG_64BIT
> 
> #ifdef
> 
> It is a little more typical style to provide an alternate version of
> the function when #ifdefing

I will do that.
> 
>> +	u64 *va;
>> +
>> +	/* See IBA oA19-28 */
>> +	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
>> +		rxe_dbg_mr(mr, "mr not in valid state");
>> +		return -EINVAL;
>> +	}
>> +
>> +	va = iova_to_vaddr(mr, iova, sizeof(value));
>> +	if (unlikely(!va)) {
>> +		rxe_dbg_mr(mr, "iova out of range");
>> +		return -ERANGE;
>> +	}
>> +
>> +	/* See IBA A19.4.2 */
>> +	if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) {
>> +		rxe_dbg_mr(mr, "misaligned address");
>> +		return -RXE_ERR_NOT_ALIGNED;
>> +	}
>> +
>> +	/* Do atomic write after all prior operations have completed */
>> +	smp_store_release(va, value);
>> +
>> +	return 0;
>> +#else
>> +	WARN_ON(1);
>> +	return -EINVAL;
>> +#endif
>> +}
>> +
>>  int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
>>  {
>>  	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
>> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
>> index 1e38e5da1f4c..49298ff88d25 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
>> @@ -764,30 +764,40 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>>  	return RESPST_ACKNOWLEDGE;
>>  }
>>  
>> -#ifdef CONFIG_64BIT
>> -static enum resp_states do_atomic_write(struct rxe_qp *qp,
>> -					struct rxe_pkt_info *pkt)
>> +static enum resp_states atomic_write_reply(struct rxe_qp *qp,
>> +					   struct rxe_pkt_info *pkt)
>>  {
>> -	struct rxe_mr *mr = qp->resp.mr;
>> -	int payload = payload_size(pkt);
>> -	u64 src, *dst;
>> -
>> -	if (mr->state != RXE_MR_STATE_VALID)
>> -		return RESPST_ERR_RKEY_VIOLATION;
>> +	struct resp_res *res = qp->resp.res;
>> +	struct rxe_mr *mr;
>> +	u64 value;
>> +	u64 iova;
>> +	int err;
>>  
>> -	memcpy(&src, payload_addr(pkt), payload);
>> +	if (!res) {
>> +		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
>> +		qp->resp.res = res;
>> +	}
>>  
>> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
>> -	/* check vaddr is 8 bytes aligned. */
>> -	if (!dst || (uintptr_t)dst & 7)
>> -		return RESPST_ERR_MISALIGNED_ATOMIC;
>> +	if (res->replay)
>> +		return RESPST_ACKNOWLEDGE;
>>  
>> -	/* Do atomic write after all prior operations have completed */
>> -	smp_store_release(dst, src);
>> +	mr = qp->resp.mr;
>> +	value = *(u64 *)payload_addr(pkt);
>> +	iova = qp->resp.va + qp->resp.offset;
>>  
>> -	/* decrease resp.resid to zero */
>> -	qp->resp.resid -= sizeof(payload);
>> +#if defined CONFIG_64BIT
> 
> Shouldn't need a #ifdef here

This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
won't call the code in mr.

I really don't understand why Fujitsu did it all this way instead of just
using a spinlock for 32 bit architectures as a fallback. But if I want to
keep to the spirit of their implementation this is fairly clear I think.

Bob
> 
>> +	err = rxe_mr_do_atomic_write(mr, iova, value);
>> +	if (unlikely(err)) {
>> +		if (err == -RXE_ERR_NOT_ALIGNED)
>> +			return RESPST_ERR_MISALIGNED_ATOMIC;
>> +		else
>> +			return RESPST_ERR_RKEY_VIOLATION;
> 
> Again why not return the RESPST directly then the stub can return
> RESPST_ERR_UNSUPPORTED_OPCODE?
> 
> Jason
Jason Gunthorpe Jan. 17, 2023, 4:59 p.m. UTC | #3
On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote:

> >> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
> >> -	/* check vaddr is 8 bytes aligned. */
> >> -	if (!dst || (uintptr_t)dst & 7)
> >> -		return RESPST_ERR_MISALIGNED_ATOMIC;
> >> +	if (res->replay)
> >> +		return RESPST_ACKNOWLEDGE;
> >>  
> >> -	/* Do atomic write after all prior operations have completed */
> >> -	smp_store_release(dst, src);
> >> +	mr = qp->resp.mr;
> >> +	value = *(u64 *)payload_addr(pkt);
> >> +	iova = qp->resp.va + qp->resp.offset;
> >>  
> >> -	/* decrease resp.resid to zero */
> >> -	qp->resp.resid -= sizeof(payload);
> >> +#if defined CONFIG_64BIT
> > 
> > Shouldn't need a #ifdef here
> 
> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
> won't call the code in mr.

? That doesn't seem right
 
> I really don't understand why Fujitsu did it all this way instead of just
> using a spinlock for 32 bit architectures as a fallback. But if I want to
> keep to the spirit of their implementation this is fairly clear I think.

IIRC the IBA definition is that this is supposed to be coherent with
the host CPU, the spinlock version is for the non-atomic atomics which
only has to be coherent with the "hca"

So a spinlock will not provide coherency with userspace that may be
touching this same atomic memory.

Jason
Bob Pearson Jan. 17, 2023, 5:04 p.m. UTC | #4
On 1/17/23 10:59, Jason Gunthorpe wrote:
> On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote:
> 
>>>> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
>>>> -	/* check vaddr is 8 bytes aligned. */
>>>> -	if (!dst || (uintptr_t)dst & 7)
>>>> -		return RESPST_ERR_MISALIGNED_ATOMIC;
>>>> +	if (res->replay)
>>>> +		return RESPST_ACKNOWLEDGE;
>>>>  
>>>> -	/* Do atomic write after all prior operations have completed */
>>>> -	smp_store_release(dst, src);
>>>> +	mr = qp->resp.mr;
>>>> +	value = *(u64 *)payload_addr(pkt);
>>>> +	iova = qp->resp.va + qp->resp.offset;
>>>>  
>>>> -	/* decrease resp.resid to zero */
>>>> -	qp->resp.resid -= sizeof(payload);
>>>> +#if defined CONFIG_64BIT
>>>
>>> Shouldn't need a #ifdef here
>>
>> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
>> won't call the code in mr.
> 
> ? That doesn't seem right

that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out
of this state and we need a way to get to them. The #ifdef provides that third path.
>  
>> I really don't understand why Fujitsu did it all this way instead of just
>> using a spinlock for 32 bit architectures as a fallback. But if I want to
>> keep to the spirit of their implementation this is fairly clear I think.
> 
> IIRC the IBA definition is that this is supposed to be coherent with
> the host CPU, the spinlock version is for the non-atomic atomics which
> only has to be coherent with the "hca"
> 
> So a spinlock will not provide coherency with userspace that may be
> touching this same atomic memory.

Thanks. That makes sense.

Bob
> 
> Jason
Jason Gunthorpe Jan. 17, 2023, 6:05 p.m. UTC | #5
On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote:
> On 1/17/23 10:59, Jason Gunthorpe wrote:
> > On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote:
> > 
> >>>> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
> >>>> -	/* check vaddr is 8 bytes aligned. */
> >>>> -	if (!dst || (uintptr_t)dst & 7)
> >>>> -		return RESPST_ERR_MISALIGNED_ATOMIC;
> >>>> +	if (res->replay)
> >>>> +		return RESPST_ACKNOWLEDGE;
> >>>>  
> >>>> -	/* Do atomic write after all prior operations have completed */
> >>>> -	smp_store_release(dst, src);
> >>>> +	mr = qp->resp.mr;
> >>>> +	value = *(u64 *)payload_addr(pkt);
> >>>> +	iova = qp->resp.va + qp->resp.offset;
> >>>>  
> >>>> -	/* decrease resp.resid to zero */
> >>>> -	qp->resp.resid -= sizeof(payload);
> >>>> +#if defined CONFIG_64BIT
> >>>
> >>> Shouldn't need a #ifdef here
> >>
> >> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
> >> won't call the code in mr.
> > 
> > ? That doesn't seem right
> 
> that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out
> of this state and we need a way to get to them. The #ifdef provides
> that third path.

I feel like it should be solvable without this ifdef though

Jason
Bob Pearson Jan. 17, 2023, 8:41 p.m. UTC | #6
On 1/17/23 12:05, Jason Gunthorpe wrote:
> On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote:
>> On 1/17/23 10:59, Jason Gunthorpe wrote:
>>> On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote:
>>>
>>>>>> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
>>>>>> -	/* check vaddr is 8 bytes aligned. */
>>>>>> -	if (!dst || (uintptr_t)dst & 7)
>>>>>> -		return RESPST_ERR_MISALIGNED_ATOMIC;
>>>>>> +	if (res->replay)
>>>>>> +		return RESPST_ACKNOWLEDGE;
>>>>>>  
>>>>>> -	/* Do atomic write after all prior operations have completed */
>>>>>> -	smp_store_release(dst, src);
>>>>>> +	mr = qp->resp.mr;
>>>>>> +	value = *(u64 *)payload_addr(pkt);
>>>>>> +	iova = qp->resp.va + qp->resp.offset;
>>>>>>  
>>>>>> -	/* decrease resp.resid to zero */
>>>>>> -	qp->resp.resid -= sizeof(payload);
>>>>>> +#if defined CONFIG_64BIT
>>>>>
>>>>> Shouldn't need a #ifdef here
>>>>
>>>> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
>>>> won't call the code in mr.
>>>
>>> ? That doesn't seem right
>>
>> that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out
>> of this state and we need a way to get to them. The #ifdef provides
>> that third path.
> 
> I feel like it should be solvable without this ifdef though
> 
> Jason

You could get rid of the ifdef in the atomic_write_reply() routine but then the rxe_mr_do_atomic_write() routine would have to have a second version in the #else case
that would have to return something different so that the third exit could be taken i.e.
whatever replaces the original -3. I really think this is simpler.

Bob
Jason Gunthorpe Jan. 18, 2023, 1:53 p.m. UTC | #7
On Tue, Jan 17, 2023 at 02:41:53PM -0600, Bob Pearson wrote:
> On 1/17/23 12:05, Jason Gunthorpe wrote:
> > On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote:
> >> On 1/17/23 10:59, Jason Gunthorpe wrote:
> >>> On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote:
> >>>
> >>>>>> -	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
> >>>>>> -	/* check vaddr is 8 bytes aligned. */
> >>>>>> -	if (!dst || (uintptr_t)dst & 7)
> >>>>>> -		return RESPST_ERR_MISALIGNED_ATOMIC;
> >>>>>> +	if (res->replay)
> >>>>>> +		return RESPST_ACKNOWLEDGE;
> >>>>>>  
> >>>>>> -	/* Do atomic write after all prior operations have completed */
> >>>>>> -	smp_store_release(dst, src);
> >>>>>> +	mr = qp->resp.mr;
> >>>>>> +	value = *(u64 *)payload_addr(pkt);
> >>>>>> +	iova = qp->resp.va + qp->resp.offset;
> >>>>>>  
> >>>>>> -	/* decrease resp.resid to zero */
> >>>>>> -	qp->resp.resid -= sizeof(payload);
> >>>>>> +#if defined CONFIG_64BIT
> >>>>>
> >>>>> Shouldn't need a #ifdef here
> >>>>
> >>>> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we
> >>>> won't call the code in mr.
> >>>
> >>> ? That doesn't seem right
> >>
> >> that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out
> >> of this state and we need a way to get to them. The #ifdef provides
> >> that third path.
> > 
> > I feel like it should be solvable without this ifdef though
> > 
> > Jason
> 
> You could get rid of the ifdef in the atomic_write_reply() routine but then the rxe_mr_do_atomic_write() routine would have to have a second version in the #else case
> that would have to return something different so that the third exit could be taken i.e.
> whatever replaces the original -3. I really think this is simpler.

You really should just return the RESPST_* from these functions.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index bcb1bbcf50df..b1dda0cf891b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -74,6 +74,7 @@  int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
 int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
 			u64 compare, u64 swap_add, u64 *orig_val);
+int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type);
 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 15a8d44daa35..10484f671977 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -565,6 +565,40 @@  int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
 	return 0;
 }
 
+/* only implemented for 64 bit architectures */
+int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
+{
+#if defined CONFIG_64BIT
+	u64 *va;
+
+	/* See IBA oA19-28 */
+	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
+		rxe_dbg_mr(mr, "mr not in valid state");
+		return -EINVAL;
+	}
+
+	va = iova_to_vaddr(mr, iova, sizeof(value));
+	if (unlikely(!va)) {
+		rxe_dbg_mr(mr, "iova out of range");
+		return -ERANGE;
+	}
+
+	/* See IBA A19.4.2 */
+	if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) {
+		rxe_dbg_mr(mr, "misaligned address");
+		return -RXE_ERR_NOT_ALIGNED;
+	}
+
+	/* Do atomic write after all prior operations have completed */
+	smp_store_release(va, value);
+
+	return 0;
+#else
+	WARN_ON(1);
+	return -EINVAL;
+#endif
+}
+
 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 {
 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 1e38e5da1f4c..49298ff88d25 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -764,30 +764,40 @@  static enum resp_states atomic_reply(struct rxe_qp *qp,
 	return RESPST_ACKNOWLEDGE;
 }
 
-#ifdef CONFIG_64BIT
-static enum resp_states do_atomic_write(struct rxe_qp *qp,
-					struct rxe_pkt_info *pkt)
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+					   struct rxe_pkt_info *pkt)
 {
-	struct rxe_mr *mr = qp->resp.mr;
-	int payload = payload_size(pkt);
-	u64 src, *dst;
-
-	if (mr->state != RXE_MR_STATE_VALID)
-		return RESPST_ERR_RKEY_VIOLATION;
+	struct resp_res *res = qp->resp.res;
+	struct rxe_mr *mr;
+	u64 value;
+	u64 iova;
+	int err;
 
-	memcpy(&src, payload_addr(pkt), payload);
+	if (!res) {
+		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+		qp->resp.res = res;
+	}
 
-	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
-	/* check vaddr is 8 bytes aligned. */
-	if (!dst || (uintptr_t)dst & 7)
-		return RESPST_ERR_MISALIGNED_ATOMIC;
+	if (res->replay)
+		return RESPST_ACKNOWLEDGE;
 
-	/* Do atomic write after all prior operations have completed */
-	smp_store_release(dst, src);
+	mr = qp->resp.mr;
+	value = *(u64 *)payload_addr(pkt);
+	iova = qp->resp.va + qp->resp.offset;
 
-	/* decrease resp.resid to zero */
-	qp->resp.resid -= sizeof(payload);
+#if defined CONFIG_64BIT
+	err = rxe_mr_do_atomic_write(mr, iova, value);
+	if (unlikely(err)) {
+		if (err == -RXE_ERR_NOT_ALIGNED)
+			return RESPST_ERR_MISALIGNED_ATOMIC;
+		else
+			return RESPST_ERR_RKEY_VIOLATION;
+	}
+#else
+	return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif
 
+	qp->resp.resid = 0;
 	qp->resp.msn++;
 
 	/* next expected psn, read handles this separately */
@@ -796,29 +806,8 @@  static enum resp_states do_atomic_write(struct rxe_qp *qp,
 
 	qp->resp.opcode = pkt->opcode;
 	qp->resp.status = IB_WC_SUCCESS;
-	return RESPST_ACKNOWLEDGE;
-}
-#else
-static enum resp_states do_atomic_write(struct rxe_qp *qp,
-					struct rxe_pkt_info *pkt)
-{
-	return RESPST_ERR_UNSUPPORTED_OPCODE;
-}
-#endif /* CONFIG_64BIT */
 
-static enum resp_states atomic_write_reply(struct rxe_qp *qp,
-					   struct rxe_pkt_info *pkt)
-{
-	struct resp_res *res = qp->resp.res;
-
-	if (!res) {
-		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
-		qp->resp.res = res;
-	}
-
-	if (res->replay)
-		return RESPST_ACKNOWLEDGE;
-	return do_atomic_write(qp, pkt);
+	return RESPST_ACKNOWLEDGE;
 }
 
 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,