Message ID | 20230116235602.22899-5-rpearsonhpe@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/rxe: Replace mr page map with an xarray | expand |
On Mon, Jan 16, 2023 at 05:56:01PM -0600, Bob Pearson wrote: > +/* only implemented for 64 bit architectures */ > +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) > +{ > +#if defined CONFIG_64BIT #ifdef It is a little more typical style to provide an alternate version of the function when #ifdefing > + u64 *va; > + > + /* See IBA oA19-28 */ > + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { > + rxe_dbg_mr(mr, "mr not in valid state"); > + return -EINVAL; > + } > + > + va = iova_to_vaddr(mr, iova, sizeof(value)); > + if (unlikely(!va)) { > + rxe_dbg_mr(mr, "iova out of range"); > + return -ERANGE; > + } > + > + /* See IBA A19.4.2 */ > + if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) { > + rxe_dbg_mr(mr, "misaligned address"); > + return -RXE_ERR_NOT_ALIGNED; > + } > + > + /* Do atomic write after all prior operations have completed */ > + smp_store_release(va, value); > + > + return 0; > +#else > + WARN_ON(1); > + return -EINVAL; > +#endif > +} > + > int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) > { > struct rxe_sge *sge = &dma->sge[dma->cur_sge]; > diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c > index 1e38e5da1f4c..49298ff88d25 100644 > --- a/drivers/infiniband/sw/rxe/rxe_resp.c > +++ b/drivers/infiniband/sw/rxe/rxe_resp.c > @@ -764,30 +764,40 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, > return RESPST_ACKNOWLEDGE; > } > > -#ifdef CONFIG_64BIT > -static enum resp_states do_atomic_write(struct rxe_qp *qp, > - struct rxe_pkt_info *pkt) > +static enum resp_states atomic_write_reply(struct rxe_qp *qp, > + struct rxe_pkt_info *pkt) > { > - struct rxe_mr *mr = qp->resp.mr; > - int payload = payload_size(pkt); > - u64 src, *dst; > - > - if (mr->state != RXE_MR_STATE_VALID) > - return RESPST_ERR_RKEY_VIOLATION; > + struct resp_res *res = qp->resp.res; > + struct rxe_mr *mr; > + u64 value; > + u64 iova; > + int err; > > - memcpy(&src, payload_addr(pkt), payload); > + if (!res) { > + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); > + qp->resp.res = res; > + } > > - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); > - /* check vaddr is 8 bytes aligned. */ > - if (!dst || (uintptr_t)dst & 7) > - return RESPST_ERR_MISALIGNED_ATOMIC; > + if (res->replay) > + return RESPST_ACKNOWLEDGE; > > - /* Do atomic write after all prior operations have completed */ > - smp_store_release(dst, src); > + mr = qp->resp.mr; > + value = *(u64 *)payload_addr(pkt); > + iova = qp->resp.va + qp->resp.offset; > > - /* decrease resp.resid to zero */ > - qp->resp.resid -= sizeof(payload); > +#if defined CONFIG_64BIT Shouldn't need a #ifdef here > + err = rxe_mr_do_atomic_write(mr, iova, value); > + if (unlikely(err)) { > + if (err == -RXE_ERR_NOT_ALIGNED) > + return RESPST_ERR_MISALIGNED_ATOMIC; > + else > + return RESPST_ERR_RKEY_VIOLATION; Again why not return the RESPST directly then the stub can return RESPST_ERR_UNSUPPORTED_OPCODE? Jason
On 1/17/23 09:11, Jason Gunthorpe wrote: > On Mon, Jan 16, 2023 at 05:56:01PM -0600, Bob Pearson wrote: > >> +/* only implemented for 64 bit architectures */ >> +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) >> +{ >> +#if defined CONFIG_64BIT > > #ifdef > > It is a little more typical style to provide an alternate version of > the function when #ifdefing I will do that. > >> + u64 *va; >> + >> + /* See IBA oA19-28 */ >> + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { >> + rxe_dbg_mr(mr, "mr not in valid state"); >> + return -EINVAL; >> + } >> + >> + va = iova_to_vaddr(mr, iova, sizeof(value)); >> + if (unlikely(!va)) { >> + rxe_dbg_mr(mr, "iova out of range"); >> + return -ERANGE; >> + } >> + >> + /* See IBA A19.4.2 */ >> + if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) { >> + rxe_dbg_mr(mr, "misaligned address"); >> + return -RXE_ERR_NOT_ALIGNED; >> + } >> + >> + /* Do atomic write after all prior operations have completed */ >> + smp_store_release(va, value); >> + >> + return 0; >> +#else >> + WARN_ON(1); >> + return -EINVAL; >> +#endif >> +} >> + >> int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) >> { >> struct rxe_sge *sge = &dma->sge[dma->cur_sge]; >> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c >> index 1e38e5da1f4c..49298ff88d25 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_resp.c >> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c >> @@ -764,30 +764,40 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, >> return RESPST_ACKNOWLEDGE; >> } >> >> -#ifdef CONFIG_64BIT >> -static enum resp_states do_atomic_write(struct rxe_qp *qp, >> - struct rxe_pkt_info *pkt) >> +static enum resp_states atomic_write_reply(struct rxe_qp *qp, >> + struct rxe_pkt_info *pkt) >> { >> - struct rxe_mr *mr = qp->resp.mr; >> - int payload = payload_size(pkt); >> - u64 src, *dst; >> - >> - if (mr->state != RXE_MR_STATE_VALID) >> - return RESPST_ERR_RKEY_VIOLATION; >> + struct resp_res *res = qp->resp.res; >> + struct rxe_mr *mr; >> + u64 value; >> + u64 iova; >> + int err; >> >> - memcpy(&src, payload_addr(pkt), payload); >> + if (!res) { >> + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); >> + qp->resp.res = res; >> + } >> >> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); >> - /* check vaddr is 8 bytes aligned. */ >> - if (!dst || (uintptr_t)dst & 7) >> - return RESPST_ERR_MISALIGNED_ATOMIC; >> + if (res->replay) >> + return RESPST_ACKNOWLEDGE; >> >> - /* Do atomic write after all prior operations have completed */ >> - smp_store_release(dst, src); >> + mr = qp->resp.mr; >> + value = *(u64 *)payload_addr(pkt); >> + iova = qp->resp.va + qp->resp.offset; >> >> - /* decrease resp.resid to zero */ >> - qp->resp.resid -= sizeof(payload); >> +#if defined CONFIG_64BIT > > Shouldn't need a #ifdef here This avoids a new special error (i.e. NOT_64_bit) and makes it clear we won't call the code in mr. I really don't understand why Fujitsu did it all this way instead of just using a spinlock for 32 bit architectures as a fallback. But if I want to keep to the spirit of their implementation this is fairly clear I think. Bob > >> + err = rxe_mr_do_atomic_write(mr, iova, value); >> + if (unlikely(err)) { >> + if (err == -RXE_ERR_NOT_ALIGNED) >> + return RESPST_ERR_MISALIGNED_ATOMIC; >> + else >> + return RESPST_ERR_RKEY_VIOLATION; > > Again why not return the RESPST directly then the stub can return > RESPST_ERR_UNSUPPORTED_OPCODE? > > Jason
On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote: > >> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); > >> - /* check vaddr is 8 bytes aligned. */ > >> - if (!dst || (uintptr_t)dst & 7) > >> - return RESPST_ERR_MISALIGNED_ATOMIC; > >> + if (res->replay) > >> + return RESPST_ACKNOWLEDGE; > >> > >> - /* Do atomic write after all prior operations have completed */ > >> - smp_store_release(dst, src); > >> + mr = qp->resp.mr; > >> + value = *(u64 *)payload_addr(pkt); > >> + iova = qp->resp.va + qp->resp.offset; > >> > >> - /* decrease resp.resid to zero */ > >> - qp->resp.resid -= sizeof(payload); > >> +#if defined CONFIG_64BIT > > > > Shouldn't need a #ifdef here > > This avoids a new special error (i.e. NOT_64_bit) and makes it clear we > won't call the code in mr. ? That doesn't seem right > I really don't understand why Fujitsu did it all this way instead of just > using a spinlock for 32 bit architectures as a fallback. But if I want to > keep to the spirit of their implementation this is fairly clear I think. IIRC the IBA definition is that this is supposed to be coherent with the host CPU, the spinlock version is for the non-atomic atomics which only has to be coherent with the "hca" So a spinlock will not provide coherency with userspace that may be touching this same atomic memory. Jason
On 1/17/23 10:59, Jason Gunthorpe wrote: > On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote: > >>>> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); >>>> - /* check vaddr is 8 bytes aligned. */ >>>> - if (!dst || (uintptr_t)dst & 7) >>>> - return RESPST_ERR_MISALIGNED_ATOMIC; >>>> + if (res->replay) >>>> + return RESPST_ACKNOWLEDGE; >>>> >>>> - /* Do atomic write after all prior operations have completed */ >>>> - smp_store_release(dst, src); >>>> + mr = qp->resp.mr; >>>> + value = *(u64 *)payload_addr(pkt); >>>> + iova = qp->resp.va + qp->resp.offset; >>>> >>>> - /* decrease resp.resid to zero */ >>>> - qp->resp.resid -= sizeof(payload); >>>> +#if defined CONFIG_64BIT >>> >>> Shouldn't need a #ifdef here >> >> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we >> won't call the code in mr. > > ? That doesn't seem right that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out of this state and we need a way to get to them. The #ifdef provides that third path. > >> I really don't understand why Fujitsu did it all this way instead of just >> using a spinlock for 32 bit architectures as a fallback. But if I want to >> keep to the spirit of their implementation this is fairly clear I think. > > IIRC the IBA definition is that this is supposed to be coherent with > the host CPU, the spinlock version is for the non-atomic atomics which > only has to be coherent with the "hca" > > So a spinlock will not provide coherency with userspace that may be > touching this same atomic memory. Thanks. That makes sense. Bob > > Jason
On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote: > On 1/17/23 10:59, Jason Gunthorpe wrote: > > On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote: > > > >>>> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); > >>>> - /* check vaddr is 8 bytes aligned. */ > >>>> - if (!dst || (uintptr_t)dst & 7) > >>>> - return RESPST_ERR_MISALIGNED_ATOMIC; > >>>> + if (res->replay) > >>>> + return RESPST_ACKNOWLEDGE; > >>>> > >>>> - /* Do atomic write after all prior operations have completed */ > >>>> - smp_store_release(dst, src); > >>>> + mr = qp->resp.mr; > >>>> + value = *(u64 *)payload_addr(pkt); > >>>> + iova = qp->resp.va + qp->resp.offset; > >>>> > >>>> - /* decrease resp.resid to zero */ > >>>> - qp->resp.resid -= sizeof(payload); > >>>> +#if defined CONFIG_64BIT > >>> > >>> Shouldn't need a #ifdef here > >> > >> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we > >> won't call the code in mr. > > > > ? That doesn't seem right > > that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out > of this state and we need a way to get to them. The #ifdef provides > that third path. I feel like it should be solvable without this ifdef though Jason
On 1/17/23 12:05, Jason Gunthorpe wrote: > On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote: >> On 1/17/23 10:59, Jason Gunthorpe wrote: >>> On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote: >>> >>>>>> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); >>>>>> - /* check vaddr is 8 bytes aligned. */ >>>>>> - if (!dst || (uintptr_t)dst & 7) >>>>>> - return RESPST_ERR_MISALIGNED_ATOMIC; >>>>>> + if (res->replay) >>>>>> + return RESPST_ACKNOWLEDGE; >>>>>> >>>>>> - /* Do atomic write after all prior operations have completed */ >>>>>> - smp_store_release(dst, src); >>>>>> + mr = qp->resp.mr; >>>>>> + value = *(u64 *)payload_addr(pkt); >>>>>> + iova = qp->resp.va + qp->resp.offset; >>>>>> >>>>>> - /* decrease resp.resid to zero */ >>>>>> - qp->resp.resid -= sizeof(payload); >>>>>> +#if defined CONFIG_64BIT >>>>> >>>>> Shouldn't need a #ifdef here >>>> >>>> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we >>>> won't call the code in mr. >>> >>> ? That doesn't seem right >> >> that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out >> of this state and we need a way to get to them. The #ifdef provides >> that third path. > > I feel like it should be solvable without this ifdef though > > Jason You could get rid of the ifdef in the atomic_write_reply() routine but then the rxe_mr_do_atomic_write() routine would have to have a second version in the #else case that would have to return something different so that the third exit could be taken i.e. whatever replaces the original -3. I really think this is simpler. Bob
On Tue, Jan 17, 2023 at 02:41:53PM -0600, Bob Pearson wrote: > On 1/17/23 12:05, Jason Gunthorpe wrote: > > On Tue, Jan 17, 2023 at 11:04:29AM -0600, Bob Pearson wrote: > >> On 1/17/23 10:59, Jason Gunthorpe wrote: > >>> On Tue, Jan 17, 2023 at 10:57:31AM -0600, Bob Pearson wrote: > >>> > >>>>>> - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); > >>>>>> - /* check vaddr is 8 bytes aligned. */ > >>>>>> - if (!dst || (uintptr_t)dst & 7) > >>>>>> - return RESPST_ERR_MISALIGNED_ATOMIC; > >>>>>> + if (res->replay) > >>>>>> + return RESPST_ACKNOWLEDGE; > >>>>>> > >>>>>> - /* Do atomic write after all prior operations have completed */ > >>>>>> - smp_store_release(dst, src); > >>>>>> + mr = qp->resp.mr; > >>>>>> + value = *(u64 *)payload_addr(pkt); > >>>>>> + iova = qp->resp.va + qp->resp.offset; > >>>>>> > >>>>>> - /* decrease resp.resid to zero */ > >>>>>> - qp->resp.resid -= sizeof(payload); > >>>>>> +#if defined CONFIG_64BIT > >>>>> > >>>>> Shouldn't need a #ifdef here > >>>> > >>>> This avoids a new special error (i.e. NOT_64_bit) and makes it clear we > >>>> won't call the code in mr. > >>> > >>> ? That doesn't seem right > >> > >> that was the -3 of the -1, -2, -3 that we just fixed. there are three error paths out > >> of this state and we need a way to get to them. The #ifdef provides > >> that third path. > > > > I feel like it should be solvable without this ifdef though > > > > Jason > > You could get rid of the ifdef in the atomic_write_reply() routine but then the rxe_mr_do_atomic_write() routine would have to have a second version in the #else case > that would have to return something different so that the third exit could be taken i.e. > whatever replaces the original -3. I really think this is simpler. You really should just return the RESPST_* from these functions. Jason
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index bcb1bbcf50df..b1dda0cf891b 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -74,6 +74,7 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length); int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, u64 compare, u64 swap_add, u64 *orig_val); +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value); struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, enum rxe_mr_lookup_type type); int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 15a8d44daa35..10484f671977 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -565,6 +565,40 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, return 0; } +/* only implemented for 64 bit architectures */ +int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) +{ +#if defined CONFIG_64BIT + u64 *va; + + /* See IBA oA19-28 */ + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { + rxe_dbg_mr(mr, "mr not in valid state"); + return -EINVAL; + } + + va = iova_to_vaddr(mr, iova, sizeof(value)); + if (unlikely(!va)) { + rxe_dbg_mr(mr, "iova out of range"); + return -ERANGE; + } + + /* See IBA A19.4.2 */ + if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) { + rxe_dbg_mr(mr, "misaligned address"); + return -RXE_ERR_NOT_ALIGNED; + } + + /* Do atomic write after all prior operations have completed */ + smp_store_release(va, value); + + return 0; +#else + WARN_ON(1); + return -EINVAL; +#endif +} + int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) { struct rxe_sge *sge = &dma->sge[dma->cur_sge]; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1e38e5da1f4c..49298ff88d25 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -764,30 +764,40 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, return RESPST_ACKNOWLEDGE; } -#ifdef CONFIG_64BIT -static enum resp_states do_atomic_write(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states atomic_write_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - struct rxe_mr *mr = qp->resp.mr; - int payload = payload_size(pkt); - u64 src, *dst; - - if (mr->state != RXE_MR_STATE_VALID) - return RESPST_ERR_RKEY_VIOLATION; + struct resp_res *res = qp->resp.res; + struct rxe_mr *mr; + u64 value; + u64 iova; + int err; - memcpy(&src, payload_addr(pkt), payload); + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); + qp->resp.res = res; + } - dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); - /* check vaddr is 8 bytes aligned. */ - if (!dst || (uintptr_t)dst & 7) - return RESPST_ERR_MISALIGNED_ATOMIC; + if (res->replay) + return RESPST_ACKNOWLEDGE; - /* Do atomic write after all prior operations have completed */ - smp_store_release(dst, src); + mr = qp->resp.mr; + value = *(u64 *)payload_addr(pkt); + iova = qp->resp.va + qp->resp.offset; - /* decrease resp.resid to zero */ - qp->resp.resid -= sizeof(payload); +#if defined CONFIG_64BIT + err = rxe_mr_do_atomic_write(mr, iova, value); + if (unlikely(err)) { + if (err == -RXE_ERR_NOT_ALIGNED) + return RESPST_ERR_MISALIGNED_ATOMIC; + else + return RESPST_ERR_RKEY_VIOLATION; + } +#else + return RESPST_ERR_UNSUPPORTED_OPCODE; +#endif + qp->resp.resid = 0; qp->resp.msn++; /* next expected psn, read handles this separately */ @@ -796,29 +806,8 @@ static enum resp_states do_atomic_write(struct rxe_qp *qp, qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - return RESPST_ACKNOWLEDGE; -} -#else -static enum resp_states do_atomic_write(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) -{ - return RESPST_ERR_UNSUPPORTED_OPCODE; -} -#endif /* CONFIG_64BIT */ -static enum resp_states atomic_write_reply(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) -{ - struct resp_res *res = qp->resp.res; - - if (!res) { - res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); - qp->resp.res = res; - } - - if (res->replay) - return RESPST_ACKNOWLEDGE; - return do_atomic_write(qp, pkt); + return RESPST_ACKNOWLEDGE; } static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
Isolate mr specific code from atomic_write_reply() in rxe_resp.c into a subroutine rxe_mr_do_atomic_write() in rxe_mr.c. Check length for atomic write operation. Make iova_to_vaddr() static. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 34 ++++++++++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 69 ++++++++++++---------------- 3 files changed, 64 insertions(+), 40 deletions(-)