diff mbox

[rdma-core,v3,8/9] libbnxt_re: Add support for atomic operations

Message ID 1489574253-20300-9-git-send-email-devesh.sharma@broadcom.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Devesh Sharma March 15, 2017, 10:37 a.m. UTC
This patch adds support for compare-and-swap and fetch-and-add
atomic operations in user library.

v1->v2
 -- Fixed the missing "break"
 -- Changed macros to inline function

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
---
 providers/bnxt_re/bnxt_re-abi.h |  3 ++-
 providers/bnxt_re/main.h        |  8 +++++-
 providers/bnxt_re/memory.h      | 10 +++++++
 providers/bnxt_re/verbs.c       | 58 +++++++++++++++++++++++++++++++++++------
 4 files changed, 69 insertions(+), 10 deletions(-)

Comments

Leon Romanovsky March 15, 2017, 7:15 p.m. UTC | #1
On Wed, Mar 15, 2017 at 06:37:32AM -0400, Devesh Sharma wrote:
> This patch adds support for compare-and-swap and fetch-and-add
> atomic operations in user library.
>
> v1->v2
>  -- Fixed the missing "break"
>  -- Changed macros to inline function
>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
> ---
>  providers/bnxt_re/bnxt_re-abi.h |  3 ++-
>  providers/bnxt_re/main.h        |  8 +++++-
>  providers/bnxt_re/memory.h      | 10 +++++++
>  providers/bnxt_re/verbs.c       | 58 +++++++++++++++++++++++++++++++++++------
>  4 files changed, 69 insertions(+), 10 deletions(-)
>
> diff --git a/providers/bnxt_re/bnxt_re-abi.h b/providers/bnxt_re/bnxt_re-abi.h
> index 581e1b7..557221b 100644
> --- a/providers/bnxt_re/bnxt_re-abi.h
> +++ b/providers/bnxt_re/bnxt_re-abi.h
> @@ -54,7 +54,8 @@ enum bnxt_re_wr_opcode {
>  	BNXT_RE_WR_OPCD_ATOMIC_FA	= 0x0B,
>  	BNXT_RE_WR_OPCD_LOC_INVAL	= 0x0C,
>  	BNXT_RE_WR_OPCD_BIND		= 0x0E,
> -	BNXT_RE_WR_OPCD_RECV		= 0x80
> +	BNXT_RE_WR_OPCD_RECV		= 0x80,
> +	BNXT_RE_WR_OPCD_INVAL		= 0xFF
>  };
>
>  enum bnxt_re_wr_flags {
> diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h
> index 9d99c64..a417328 100644
> --- a/providers/bnxt_re/main.h
> +++ b/providers/bnxt_re/main.h
> @@ -236,9 +236,15 @@ static inline uint8_t bnxt_re_ibv_to_bnxt_wr_opcd(uint8_t ibv_opcd)
>  	case IBV_WR_RDMA_READ:
>  		bnxt_opcd = BNXT_RE_WR_OPCD_RDMA_READ;
>  		break;
> +	case IBV_WR_ATOMIC_CMP_AND_SWP:
> +		bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_CS;
> +		break;
> +	case IBV_WR_ATOMIC_FETCH_AND_ADD:
> +		bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_FA;
> +		break;
>  		/* TODO: Add other opcodes */
>  	default:
> -		bnxt_opcd = 0xFF;
> +		bnxt_opcd = BNXT_RE_WR_OPCD_INVAL;
>  		break;
>  	};
>
> diff --git a/providers/bnxt_re/memory.h b/providers/bnxt_re/memory.h
> index debb31a..d7e6a92 100644
> --- a/providers/bnxt_re/memory.h
> +++ b/providers/bnxt_re/memory.h
> @@ -83,6 +83,16 @@ static inline void iowrite32(__u32 *dst, uint32_t *src)
>  	*(volatile __u32 *)dst = *src;
>  }
>
> +static inline __u32 upper_32_bits(uint64_t n)
> +{
> +	return (__u32)((n >> 16) >> 16);
> +}
> +
> +static inline __u32 lower_32_bits(uint64_t n)
> +{
> +	return (__u32)(n & 0xFFFFFFFFUL);
> +}
> +
>  /* Basic queue operation */
>  static inline uint32_t bnxt_re_is_que_full(struct bnxt_re_queue *que)
>  {
> diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
> index e60010d..85d77cd 100644
> --- a/providers/bnxt_re/verbs.c
> +++ b/providers/bnxt_re/verbs.c
> @@ -1068,6 +1068,9 @@ static int bnxt_re_build_send_sqe(struct bnxt_re_qp *qp, void *wqe,
>
>  	/* Fill Header */
>  	opcode = bnxt_re_ibv_to_bnxt_wr_opcd(wr->opcode);
> +	if (opcode == BNXT_RE_WR_OPCD_INVAL)
> +		return -EINVAL;
> +
>  	hdr->rsv_ws_fl_wt |= (opcode & BNXT_RE_HDR_WT_MASK);
>
>  	if (is_inline) {
> @@ -1116,6 +1119,44 @@ static int bnxt_re_build_rdma_sqe(struct bnxt_re_qp *qp, void *wqe,
>  	return len;
>  }
>
> +static int bnxt_re_build_cns_sqe(struct bnxt_re_qp *qp, void *wqe,
> +				 struct ibv_send_wr *wr)
> +{
> +	struct bnxt_re_bsqe *hdr = wqe;
> +	struct bnxt_re_atomic *sqe = ((void *)wqe +
> +				      sizeof(struct bnxt_re_bsqe));
> +	int len;
> +
> +	len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
> +	hdr->key_immd = wr->wr.atomic.rkey;
> +	sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
> +	sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
> +	sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
> +	sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
> +	sqe->swp_dt_lo = lower_32_bits(wr->wr.atomic.swap);
> +	sqe->swp_dt_hi = upper_32_bits(wr->wr.atomic.swap);
> +

Sorry for my question, but why do you need to separate uint64_t variable
to two __u32? Why can't you declare one __u64?

> +	return len;
> +}
> +
> +static int bnxt_re_build_fna_sqe(struct bnxt_re_qp *qp, void *wqe,
> +				 struct ibv_send_wr *wr)
> +{
> +	struct bnxt_re_bsqe *hdr = wqe;
> +	struct bnxt_re_atomic *sqe = ((void *)wqe +
> +				      sizeof(struct bnxt_re_bsqe));
> +	int len;
> +
> +	len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
> +	hdr->key_immd = wr->wr.atomic.rkey;
> +	sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
> +	sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
> +	sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
> +	sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
> +
> +	return len;
> +}
> +
>  int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
>  		      struct ibv_send_wr **bad)
>  {
> @@ -1169,27 +1210,28 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
>  			else
>  				bytes = bnxt_re_build_send_sqe(qp, sqe, wr,
>  							       is_inline);
> -			if (bytes < 0)
> -				ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
>  			break;
>  		case IBV_WR_RDMA_WRITE_WITH_IMM:
>  			hdr->key_immd = wr->imm_data;
>  		case IBV_WR_RDMA_WRITE:
>  			bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, is_inline);
> -			if (bytes < 0)
> -				ret = ENOMEM;
>  			break;
>  		case IBV_WR_RDMA_READ:
>  			bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, false);
> -			if (bytes < 0)
> -				ret = ENOMEM;
> +			break;
> +		case IBV_WR_ATOMIC_CMP_AND_SWP:
> +			bytes = bnxt_re_build_cns_sqe(qp, sqe, wr);
> +			break;
> +		case IBV_WR_ATOMIC_FETCH_AND_ADD:
> +			bytes = bnxt_re_build_fna_sqe(qp, sqe, wr);
>  			break;
>  		default:
> -			ret = EINVAL;
> +			bytes = -EINVAL;
>  			break;
>  		}
>
> -		if (ret) {
> +		if (bytes < 0) {
> +			ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
>  			*bad = wr;
>  			break;
>  		}
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Devesh Sharma March 16, 2017, 3:54 p.m. UTC | #2
On Thu, Mar 16, 2017 at 12:45 AM, Leon Romanovsky <leon@kernel.org> wrote:
> On Wed, Mar 15, 2017 at 06:37:32AM -0400, Devesh Sharma wrote:
>> This patch adds support for compare-and-swap and fetch-and-add
>> atomic operations in user library.
>>
>> v1->v2
>>  -- Fixed the missing "break"
>>  -- Changed macros to inline function
>>
>> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
>> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
>> Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
>> ---
>>  providers/bnxt_re/bnxt_re-abi.h |  3 ++-
>>  providers/bnxt_re/main.h        |  8 +++++-
>>  providers/bnxt_re/memory.h      | 10 +++++++
>>  providers/bnxt_re/verbs.c       | 58 +++++++++++++++++++++++++++++++++++------
>>  4 files changed, 69 insertions(+), 10 deletions(-)
>>
>> diff --git a/providers/bnxt_re/bnxt_re-abi.h b/providers/bnxt_re/bnxt_re-abi.h
>> index 581e1b7..557221b 100644
>> --- a/providers/bnxt_re/bnxt_re-abi.h
>> +++ b/providers/bnxt_re/bnxt_re-abi.h
>> @@ -54,7 +54,8 @@ enum bnxt_re_wr_opcode {
>>       BNXT_RE_WR_OPCD_ATOMIC_FA       = 0x0B,
>>       BNXT_RE_WR_OPCD_LOC_INVAL       = 0x0C,
>>       BNXT_RE_WR_OPCD_BIND            = 0x0E,
>> -     BNXT_RE_WR_OPCD_RECV            = 0x80
>> +     BNXT_RE_WR_OPCD_RECV            = 0x80,
>> +     BNXT_RE_WR_OPCD_INVAL           = 0xFF
>>  };
>>
>>  enum bnxt_re_wr_flags {
>> diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h
>> index 9d99c64..a417328 100644
>> --- a/providers/bnxt_re/main.h
>> +++ b/providers/bnxt_re/main.h
>> @@ -236,9 +236,15 @@ static inline uint8_t bnxt_re_ibv_to_bnxt_wr_opcd(uint8_t ibv_opcd)
>>       case IBV_WR_RDMA_READ:
>>               bnxt_opcd = BNXT_RE_WR_OPCD_RDMA_READ;
>>               break;
>> +     case IBV_WR_ATOMIC_CMP_AND_SWP:
>> +             bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_CS;
>> +             break;
>> +     case IBV_WR_ATOMIC_FETCH_AND_ADD:
>> +             bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_FA;
>> +             break;
>>               /* TODO: Add other opcodes */
>>       default:
>> -             bnxt_opcd = 0xFF;
>> +             bnxt_opcd = BNXT_RE_WR_OPCD_INVAL;
>>               break;
>>       };
>>
>> diff --git a/providers/bnxt_re/memory.h b/providers/bnxt_re/memory.h
>> index debb31a..d7e6a92 100644
>> --- a/providers/bnxt_re/memory.h
>> +++ b/providers/bnxt_re/memory.h
>> @@ -83,6 +83,16 @@ static inline void iowrite32(__u32 *dst, uint32_t *src)
>>       *(volatile __u32 *)dst = *src;
>>  }
>>
>> +static inline __u32 upper_32_bits(uint64_t n)
>> +{
>> +     return (__u32)((n >> 16) >> 16);
>> +}
>> +
>> +static inline __u32 lower_32_bits(uint64_t n)
>> +{
>> +     return (__u32)(n & 0xFFFFFFFFUL);
>> +}
>> +
>>  /* Basic queue operation */
>>  static inline uint32_t bnxt_re_is_que_full(struct bnxt_re_queue *que)
>>  {
>> diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
>> index e60010d..85d77cd 100644
>> --- a/providers/bnxt_re/verbs.c
>> +++ b/providers/bnxt_re/verbs.c
>> @@ -1068,6 +1068,9 @@ static int bnxt_re_build_send_sqe(struct bnxt_re_qp *qp, void *wqe,
>>
>>       /* Fill Header */
>>       opcode = bnxt_re_ibv_to_bnxt_wr_opcd(wr->opcode);
>> +     if (opcode == BNXT_RE_WR_OPCD_INVAL)
>> +             return -EINVAL;
>> +
>>       hdr->rsv_ws_fl_wt |= (opcode & BNXT_RE_HDR_WT_MASK);
>>
>>       if (is_inline) {
>> @@ -1116,6 +1119,44 @@ static int bnxt_re_build_rdma_sqe(struct bnxt_re_qp *qp, void *wqe,
>>       return len;
>>  }
>>
>> +static int bnxt_re_build_cns_sqe(struct bnxt_re_qp *qp, void *wqe,
>> +                              struct ibv_send_wr *wr)
>> +{
>> +     struct bnxt_re_bsqe *hdr = wqe;
>> +     struct bnxt_re_atomic *sqe = ((void *)wqe +
>> +                                   sizeof(struct bnxt_re_bsqe));
>> +     int len;
>> +
>> +     len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
>> +     hdr->key_immd = wr->wr.atomic.rkey;
>> +     sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
>> +     sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
>> +     sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
>> +     sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
>> +     sqe->swp_dt_lo = lower_32_bits(wr->wr.atomic.swap);
>> +     sqe->swp_dt_hi = upper_32_bits(wr->wr.atomic.swap);
>> +
>
> Sorry for my question, but why do you need to separate uint64_t variable
> to two __u32? Why can't you declare one __u64?

It may help 32 bit CPU during byte order conversion.

>
>> +     return len;
>> +}
>> +
>> +static int bnxt_re_build_fna_sqe(struct bnxt_re_qp *qp, void *wqe,
>> +                              struct ibv_send_wr *wr)
>> +{
>> +     struct bnxt_re_bsqe *hdr = wqe;
>> +     struct bnxt_re_atomic *sqe = ((void *)wqe +
>> +                                   sizeof(struct bnxt_re_bsqe));
>> +     int len;
>> +
>> +     len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
>> +     hdr->key_immd = wr->wr.atomic.rkey;
>> +     sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
>> +     sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
>> +     sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
>> +     sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
>> +
>> +     return len;
>> +}
>> +
>>  int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
>>                     struct ibv_send_wr **bad)
>>  {
>> @@ -1169,27 +1210,28 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
>>                       else
>>                               bytes = bnxt_re_build_send_sqe(qp, sqe, wr,
>>                                                              is_inline);
>> -                     if (bytes < 0)
>> -                             ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
>>                       break;
>>               case IBV_WR_RDMA_WRITE_WITH_IMM:
>>                       hdr->key_immd = wr->imm_data;
>>               case IBV_WR_RDMA_WRITE:
>>                       bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, is_inline);
>> -                     if (bytes < 0)
>> -                             ret = ENOMEM;
>>                       break;
>>               case IBV_WR_RDMA_READ:
>>                       bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, false);
>> -                     if (bytes < 0)
>> -                             ret = ENOMEM;
>> +                     break;
>> +             case IBV_WR_ATOMIC_CMP_AND_SWP:
>> +                     bytes = bnxt_re_build_cns_sqe(qp, sqe, wr);
>> +                     break;
>> +             case IBV_WR_ATOMIC_FETCH_AND_ADD:
>> +                     bytes = bnxt_re_build_fna_sqe(qp, sqe, wr);
>>                       break;
>>               default:
>> -                     ret = EINVAL;
>> +                     bytes = -EINVAL;
>>                       break;
>>               }
>>
>> -             if (ret) {
>> +             if (bytes < 0) {
>> +                     ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
>>                       *bad = wr;
>>                       break;
>>               }
>> --
>> 1.8.3.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bart Van Assche March 16, 2017, 4:07 p.m. UTC | #3
On Thu, 2017-03-16 at 21:24 +0530, Devesh Sharma wrote:
> On Thu, Mar 16, 2017 at 12:45 AM, Leon Romanovsky <leon@kernel.org> wrote:
> > On Wed, Mar 15, 2017 at 06:37:32AM -0400, Devesh Sharma wrote:
> > > @@ -1116,6 +1119,44 @@ static int bnxt_re_build_rdma_sqe(struct bnxt_re_qp *qp, void *wqe,
> > >       return len;
> > >  }
> > > 
> > > +static int bnxt_re_build_cns_sqe(struct bnxt_re_qp *qp, void *wqe,
> > > +                              struct ibv_send_wr *wr)
> > > +{
> > > +     struct bnxt_re_bsqe *hdr = wqe;
> > > +     struct bnxt_re_atomic *sqe = ((void *)wqe +
> > > +                                   sizeof(struct bnxt_re_bsqe));
> > > +     int len;
> > > +
> > > +     len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
> > > +     hdr->key_immd = wr->wr.atomic.rkey;
> > > +     sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
> > > +     sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
> > > +     sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
> > > +     sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
> > > +     sqe->swp_dt_lo = lower_32_bits(wr->wr.atomic.swap);
> > > +     sqe->swp_dt_hi = upper_32_bits(wr->wr.atomic.swap);
> > > +
> > 
> > Sorry for my question, but why do you need to separate uint64_t variable
> > to two __u32? Why can't you declare one __u64?
> 
> It may help 32 bit CPU during byte order conversion.

Does that mean that you consider optimizing for 32-bit CPUs more important than
optimizing for 64-bit CPUs? Does this also mean that you expect that the macros
you introduced in your driver to be more efficient than the glibc endianness
conversion macros? Sorry but this sounds like a weird explanation to me. I agree
with Leon that it would be a significant improvement if 64-bit data types would
be used instead of using numerous lower_32_bits() / upper_32_bits() calls.

Bart.--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 16, 2017, 4:40 p.m. UTC | #4
On Thu, Mar 16, 2017 at 04:07:38PM +0000, Bart Van Assche wrote:
 
> Does that mean that you consider optimizing for 32-bit CPUs more important than
> optimizing for 64-bit CPUs? Does this also mean that you expect that the macros
> you introduced in your driver to be more efficient than the glibc endianness
> conversion macros? Sorry but this sounds like a weird explanation to me. I agree
> with Leon that it would be a significant improvement if 64-bit data types would
> be used instead of using numerous lower_32_bits() / upper_32_bits() calls.

I actually think this is all just wrong.

For instance looking at bnxt_re_atomic I see it is written to a
wqe/sqe buf, which come from here:

                sqe = (void *)(sq->va + (sq->tail * sq->stride));

Which suggests it is DMA'd from the NIC, and I suspect it is 64 bit
aligned.

The driver does a whole struct 64 bit by 64 bit byteswap on every sqe:

                bnxt_re_host_to_le64((uint64_t *)sqe, sq->stride);

Which means if we look at BE then this struct:

struct bnxt_re_atomic {
        __u32 rva_lo;
        __u32 rva_hi;
        __u32 swp_dt_lo;
        __u32 swp_dt_hi;
        __u32 cmp_dt_lo;
        __u32 cmp_dt_hi;
};

becomes

struct bnxt_re_atomic {
        __le32 rva_hi;
        __le32 rva_lo;
        __le32 swp_dt_hi;
        __le32 swp_dt_lo;
        __le32 cmp_dt_hi;
        __le32 cmp_dt_lo;
};

Which doesn't look like it will work right to me.

I think Bart is basically right, you need to elimiante all of the
calls to bnxt_re_host_to_le64 and related, directly use __leXX in your
DMA structures and byte swap when storing.

For instance, if instead you do:

struct bnxt_re_atomic {
        __le64 rva;
        __le64 swp_dt;
        __le64 cmp_dt;
};

Then things will just work out properly and you will end up with the
same memory image for DMA on BE or LE platforms.

Does the kernel driver have the same problem?

Also, please use the kernel ABI file directly (see how mw_pvrdma-abi.h
makes this happen) as much as possible, drop the copies out of
providers/bnxt_re/bnxt_re-abi.h

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/providers/bnxt_re/bnxt_re-abi.h b/providers/bnxt_re/bnxt_re-abi.h
index 581e1b7..557221b 100644
--- a/providers/bnxt_re/bnxt_re-abi.h
+++ b/providers/bnxt_re/bnxt_re-abi.h
@@ -54,7 +54,8 @@  enum bnxt_re_wr_opcode {
 	BNXT_RE_WR_OPCD_ATOMIC_FA	= 0x0B,
 	BNXT_RE_WR_OPCD_LOC_INVAL	= 0x0C,
 	BNXT_RE_WR_OPCD_BIND		= 0x0E,
-	BNXT_RE_WR_OPCD_RECV		= 0x80
+	BNXT_RE_WR_OPCD_RECV		= 0x80,
+	BNXT_RE_WR_OPCD_INVAL		= 0xFF
 };
 
 enum bnxt_re_wr_flags {
diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h
index 9d99c64..a417328 100644
--- a/providers/bnxt_re/main.h
+++ b/providers/bnxt_re/main.h
@@ -236,9 +236,15 @@  static inline uint8_t bnxt_re_ibv_to_bnxt_wr_opcd(uint8_t ibv_opcd)
 	case IBV_WR_RDMA_READ:
 		bnxt_opcd = BNXT_RE_WR_OPCD_RDMA_READ;
 		break;
+	case IBV_WR_ATOMIC_CMP_AND_SWP:
+		bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_CS;
+		break;
+	case IBV_WR_ATOMIC_FETCH_AND_ADD:
+		bnxt_opcd = BNXT_RE_WR_OPCD_ATOMIC_FA;
+		break;
 		/* TODO: Add other opcodes */
 	default:
-		bnxt_opcd = 0xFF;
+		bnxt_opcd = BNXT_RE_WR_OPCD_INVAL;
 		break;
 	};
 
diff --git a/providers/bnxt_re/memory.h b/providers/bnxt_re/memory.h
index debb31a..d7e6a92 100644
--- a/providers/bnxt_re/memory.h
+++ b/providers/bnxt_re/memory.h
@@ -83,6 +83,16 @@  static inline void iowrite32(__u32 *dst, uint32_t *src)
 	*(volatile __u32 *)dst = *src;
 }
 
+static inline __u32 upper_32_bits(uint64_t n)
+{
+	return (__u32)((n >> 16) >> 16);
+}
+
+static inline __u32 lower_32_bits(uint64_t n)
+{
+	return (__u32)(n & 0xFFFFFFFFUL);
+}
+
 /* Basic queue operation */
 static inline uint32_t bnxt_re_is_que_full(struct bnxt_re_queue *que)
 {
diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
index e60010d..85d77cd 100644
--- a/providers/bnxt_re/verbs.c
+++ b/providers/bnxt_re/verbs.c
@@ -1068,6 +1068,9 @@  static int bnxt_re_build_send_sqe(struct bnxt_re_qp *qp, void *wqe,
 
 	/* Fill Header */
 	opcode = bnxt_re_ibv_to_bnxt_wr_opcd(wr->opcode);
+	if (opcode == BNXT_RE_WR_OPCD_INVAL)
+		return -EINVAL;
+
 	hdr->rsv_ws_fl_wt |= (opcode & BNXT_RE_HDR_WT_MASK);
 
 	if (is_inline) {
@@ -1116,6 +1119,44 @@  static int bnxt_re_build_rdma_sqe(struct bnxt_re_qp *qp, void *wqe,
 	return len;
 }
 
+static int bnxt_re_build_cns_sqe(struct bnxt_re_qp *qp, void *wqe,
+				 struct ibv_send_wr *wr)
+{
+	struct bnxt_re_bsqe *hdr = wqe;
+	struct bnxt_re_atomic *sqe = ((void *)wqe +
+				      sizeof(struct bnxt_re_bsqe));
+	int len;
+
+	len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
+	hdr->key_immd = wr->wr.atomic.rkey;
+	sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
+	sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
+	sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
+	sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
+	sqe->swp_dt_lo = lower_32_bits(wr->wr.atomic.swap);
+	sqe->swp_dt_hi = upper_32_bits(wr->wr.atomic.swap);
+
+	return len;
+}
+
+static int bnxt_re_build_fna_sqe(struct bnxt_re_qp *qp, void *wqe,
+				 struct ibv_send_wr *wr)
+{
+	struct bnxt_re_bsqe *hdr = wqe;
+	struct bnxt_re_atomic *sqe = ((void *)wqe +
+				      sizeof(struct bnxt_re_bsqe));
+	int len;
+
+	len = bnxt_re_build_send_sqe(qp, wqe, wr, false);
+	hdr->key_immd = wr->wr.atomic.rkey;
+	sqe->rva_lo = lower_32_bits(wr->wr.atomic.remote_addr);
+	sqe->rva_hi = upper_32_bits(wr->wr.atomic.remote_addr);
+	sqe->cmp_dt_lo = lower_32_bits(wr->wr.atomic.compare_add);
+	sqe->cmp_dt_hi = upper_32_bits(wr->wr.atomic.compare_add);
+
+	return len;
+}
+
 int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 		      struct ibv_send_wr **bad)
 {
@@ -1169,27 +1210,28 @@  int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			else
 				bytes = bnxt_re_build_send_sqe(qp, sqe, wr,
 							       is_inline);
-			if (bytes < 0)
-				ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
 			break;
 		case IBV_WR_RDMA_WRITE_WITH_IMM:
 			hdr->key_immd = wr->imm_data;
 		case IBV_WR_RDMA_WRITE:
 			bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, is_inline);
-			if (bytes < 0)
-				ret = ENOMEM;
 			break;
 		case IBV_WR_RDMA_READ:
 			bytes = bnxt_re_build_rdma_sqe(qp, sqe, wr, false);
-			if (bytes < 0)
-				ret = ENOMEM;
+			break;
+		case IBV_WR_ATOMIC_CMP_AND_SWP:
+			bytes = bnxt_re_build_cns_sqe(qp, sqe, wr);
+			break;
+		case IBV_WR_ATOMIC_FETCH_AND_ADD:
+			bytes = bnxt_re_build_fna_sqe(qp, sqe, wr);
 			break;
 		default:
-			ret = EINVAL;
+			bytes = -EINVAL;
 			break;
 		}
 
-		if (ret) {
+		if (bytes < 0) {
+			ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
 			*bad = wr;
 			break;
 		}