diff mbox

[rdma-core,3/8] mlx4: Add sparse annotations

Message ID 1499894262-10761-4-git-send-email-jgunthorpe@obsidianresearch.com (mailing list archive)
State Superseded
Headers show

Commit Message

Jason Gunthorpe July 12, 2017, 9:17 p.m. UTC
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
---
 CMakeLists.txt          |  4 +--
 providers/mlx4/cq.c     |  8 +++--
 providers/mlx4/dbrec.c  |  6 ++--
 providers/mlx4/mlx4.h   | 16 +++++-----
 providers/mlx4/mlx4dv.h | 84 ++++++++++++++++++++++++-------------------------
 providers/mlx4/qp.c     |  4 +--
 providers/mlx4/verbs.c  | 11 ++-----
 7 files changed, 64 insertions(+), 69 deletions(-)

Comments

Leon Romanovsky July 13, 2017, 7:23 a.m. UTC | #1
On Wed, Jul 12, 2017 at 03:17:37PM -0600, Jason Gunthorpe wrote:
> Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> ---
>  CMakeLists.txt          |  4 +--
>  providers/mlx4/cq.c     |  8 +++--
>  providers/mlx4/dbrec.c  |  6 ++--
>  providers/mlx4/mlx4.h   | 16 +++++-----
>  providers/mlx4/mlx4dv.h | 84 ++++++++++++++++++++++++-------------------------
>  providers/mlx4/qp.c     |  4 +--
>  providers/mlx4/verbs.c  | 11 ++-----
>  7 files changed, 64 insertions(+), 69 deletions(-)
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 16196205035f61..1f319390f2e05c 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -414,8 +414,8 @@ add_subdirectory(providers/cxgb3) # NO SPARSE
>  add_subdirectory(providers/cxgb4) # NO SPARSE
>  add_subdirectory(providers/hns) # NO SPARSE
>  add_subdirectory(providers/i40iw) # NO SPARSE
> -add_subdirectory(providers/mlx4) # NO SPARSE
> -add_subdirectory(providers/mlx4/man) # NO SPARSE
> +add_subdirectory(providers/mlx4)
> +add_subdirectory(providers/mlx4/man)
>  add_subdirectory(providers/mlx5) # NO SPARSE
>  add_subdirectory(providers/mlx5/man) # NO SPARSE
>  add_subdirectory(providers/mthca) # NO SPARSE
> diff --git a/providers/mlx4/cq.c b/providers/mlx4/cq.c
> index 50adebb82237f2..afc0e3b8c8eeb1 100644
> --- a/providers/mlx4/cq.c
> +++ b/providers/mlx4/cq.c
> @@ -287,7 +287,7 @@ static inline int mlx4_parse_cqe(struct mlx4_cq *cq,
>  		case MLX4_RECV_OPCODE_SEND_INVAL:
>  			wc->opcode   = IBV_WC_RECV;
>  			wc->wc_flags |= IBV_WC_WITH_INV;
> -			wc->imm_data = be32toh(cqe->immed_rss_invalid);
> +			wc->invalidated_rkey = be32toh(cqe->immed_rss_invalid);
>  			break;
>  		case MLX4_RECV_OPCODE_SEND:
>  			wc->opcode   = IBV_WC_RECV;
> @@ -550,13 +550,15 @@ static uint32_t mlx4_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
>  	return ecqe->vendor_err;
>  }
>
> -static uint32_t mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
> +static __be32 mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
>  {
>  	struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
>
>  	switch (mlx4dv_get_cqe_opcode(cq->cqe)) {
>  	case MLX4_RECV_OPCODE_SEND_INVAL:
> -		return be32toh(cq->cqe->immed_rss_invalid);
> +		/* This is returning invalidate_rkey which is in host order, see
> +		 * ibv_wc_read_invalidated_rkey */
> +		return (__force __be32)be32toh(cq->cqe->immed_rss_invalid);

Jason,
It is insane construction, convert to host-> force to be32 -> use as uint32_t.

>  	default:
>  		return cq->cqe->immed_rss_invalid;
>  	}
> diff --git a/providers/mlx4/dbrec.c b/providers/mlx4/dbrec.c
> index 3e875738fa61d8..84b01770dcb2c5 100644
> --- a/providers/mlx4/dbrec.c
> +++ b/providers/mlx4/dbrec.c
> @@ -84,7 +84,7 @@ static struct mlx4_db_page *__add_page(struct mlx4_context *context,
>  	return page;
>  }
>
> -uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
> +__be32 *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
>  {
>  	struct mlx4_db_page *page;
>  	uint32_t *db = NULL;
> @@ -113,10 +113,10 @@ found:
>  out:
>  	pthread_mutex_unlock(&context->db_list_mutex);
>
> -	return db;
> +	return (__force __be32 *)db;
>  }

I see that librdmacm/rsocket.c full of these __force annotations.
I would be very happy to see it fixed rather suppressed, but don't know
if it is realistic goal or not.

Thanks
Christoph Hellwig July 13, 2017, 8:07 a.m. UTC | #2
> >  	case MLX4_RECV_OPCODE_SEND_INVAL:
> > -		return be32toh(cq->cqe->immed_rss_invalid);
> > +		/* This is returning invalidate_rkey which is in host order, see
> > +		 * ibv_wc_read_invalidated_rkey */
> > +		return (__force __be32)be32toh(cq->cqe->immed_rss_invalid);
> 
> Jason,
> It is insane construction, convert to host-> force to be32 -> use as uint32_t.

Yes, this doesn't make sense to me - we are swapping it but pretending
it's native?  There's certainly something very odd going on here.

It seems like the ->read_imm_data needs to be split and/or have a
prototype change.  The sad part is that it is an exported ABI, although
one that is almost undocumented.

> >  }
> >
> > -uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
> > +__be32 *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
> >  {
> >  	struct mlx4_db_page *page;
> >  	uint32_t *db = NULL;
> > @@ -113,10 +113,10 @@ found:
> >  out:
> >  	pthread_mutex_unlock(&context->db_list_mutex);
> >
> > -	return db;
> > +	return (__force __be32 *)db;
> >  }
> 
> I see that librdmacm/rsocket.c full of these __force annotations.
> I would be very happy to see it fixed rather suppressed, but don't know
> if it is realistic goal or not.

The db local variable in mlx4_alloc_db should be easily changed to a
__be32 pointer - it is derived from a void pointer using pointer
arithmetics.

In general a __foce for endian annotations is a GIANT WARNING sign.
Don't ever add one without a long discussion first, and even then only
in a well documented helper and not randomly all over code.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 13, 2017, 5:16 p.m. UTC | #3
On Thu, Jul 13, 2017 at 01:07:52AM -0700, Christoph Hellwig wrote:
> > >  	case MLX4_RECV_OPCODE_SEND_INVAL:
> > > -		return be32toh(cq->cqe->immed_rss_invalid);
> > > +		/* This is returning invalidate_rkey which is in host order, see
> > > +		 * ibv_wc_read_invalidated_rkey */
> > > +		return (__force __be32)be32toh(cq->cqe->immed_rss_invalid);
> > 
> > Jason,
> > It is insane construction, convert to host-> force to be32 -> use as uint32_t.
> 
> Yes, this doesn't make sense to me - we are swapping it but pretending
> it's native?  There's certainly something very odd going on here.

Yes, it is odd.

mlx4_cq_read_wc_imm_data is an ABI entry point, we cannot change it.

The ABI defines it to return a be32 when the wc has immediate data and
a u32 when the wc has an remote invalidated rkey - hence the madness.

My approach is to retain this ABI, and put two user facing wrappers
around the function call that return the correct types depending on
the expected data accessed. Since the function is called '*_imm_data'
I choose to return a be32 to match the choice of name.

> It seems like the ->read_imm_data needs to be split and/or have a
> prototype change.  The sad part is that it is an exported ABI, although
> one that is almost undocumented.

Even if we split it, the existing version must retain the same
behavior and the only way to get such insane behaviar and be sparse
clean is with force right here.

Since we are stuck with that I can't really justify adding another
entry point just for invalidated rkey.. I can't really get rid of the
ifdef CHECKER in the public header with this approach.

> > I see that librdmacm/rsocket.c full of these __force annotations.
> > I would be very happy to see it fixed rather suppressed, but don't know
> > if it is realistic goal or not.
> 
> The db local variable in mlx4_alloc_db should be easily changed to a
> __be32 pointer - it is derived from a void pointer using pointer
> arithmetics.

Yeah, OK, I thought I tried that, but yep, it is OK. I will update
both places.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16196205035f61..1f319390f2e05c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -414,8 +414,8 @@  add_subdirectory(providers/cxgb3) # NO SPARSE
 add_subdirectory(providers/cxgb4) # NO SPARSE
 add_subdirectory(providers/hns) # NO SPARSE
 add_subdirectory(providers/i40iw) # NO SPARSE
-add_subdirectory(providers/mlx4) # NO SPARSE
-add_subdirectory(providers/mlx4/man) # NO SPARSE
+add_subdirectory(providers/mlx4)
+add_subdirectory(providers/mlx4/man)
 add_subdirectory(providers/mlx5) # NO SPARSE
 add_subdirectory(providers/mlx5/man) # NO SPARSE
 add_subdirectory(providers/mthca) # NO SPARSE
diff --git a/providers/mlx4/cq.c b/providers/mlx4/cq.c
index 50adebb82237f2..afc0e3b8c8eeb1 100644
--- a/providers/mlx4/cq.c
+++ b/providers/mlx4/cq.c
@@ -287,7 +287,7 @@  static inline int mlx4_parse_cqe(struct mlx4_cq *cq,
 		case MLX4_RECV_OPCODE_SEND_INVAL:
 			wc->opcode   = IBV_WC_RECV;
 			wc->wc_flags |= IBV_WC_WITH_INV;
-			wc->imm_data = be32toh(cqe->immed_rss_invalid);
+			wc->invalidated_rkey = be32toh(cqe->immed_rss_invalid);
 			break;
 		case MLX4_RECV_OPCODE_SEND:
 			wc->opcode   = IBV_WC_RECV;
@@ -550,13 +550,15 @@  static uint32_t mlx4_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
 	return ecqe->vendor_err;
 }
 
-static uint32_t mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
+static __be32 mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
 {
 	struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
 
 	switch (mlx4dv_get_cqe_opcode(cq->cqe)) {
 	case MLX4_RECV_OPCODE_SEND_INVAL:
-		return be32toh(cq->cqe->immed_rss_invalid);
+		/* This is returning invalidate_rkey which is in host order, see
+		 * ibv_wc_read_invalidated_rkey */
+		return (__force __be32)be32toh(cq->cqe->immed_rss_invalid);
 	default:
 		return cq->cqe->immed_rss_invalid;
 	}
diff --git a/providers/mlx4/dbrec.c b/providers/mlx4/dbrec.c
index 3e875738fa61d8..84b01770dcb2c5 100644
--- a/providers/mlx4/dbrec.c
+++ b/providers/mlx4/dbrec.c
@@ -84,7 +84,7 @@  static struct mlx4_db_page *__add_page(struct mlx4_context *context,
 	return page;
 }
 
-uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
+__be32 *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
 {
 	struct mlx4_db_page *page;
 	uint32_t *db = NULL;
@@ -113,10 +113,10 @@  found:
 out:
 	pthread_mutex_unlock(&context->db_list_mutex);
 
-	return db;
+	return (__force __be32 *)db;
 }
 
-void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db)
+void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, __be32 *db)
 {
 	struct mlx4_db_page *page;
 	uintptr_t ps = to_mdev(context->ibv_ctx.device)->page_size;
diff --git a/providers/mlx4/mlx4.h b/providers/mlx4/mlx4.h
index f38feedddf4705..f83215933e9dce 100644
--- a/providers/mlx4/mlx4.h
+++ b/providers/mlx4/mlx4.h
@@ -159,8 +159,8 @@  struct mlx4_cq {
 	pthread_spinlock_t		lock;
 	uint32_t			cqn;
 	uint32_t			cons_index;
-	uint32_t		       *set_ci_db;
-	uint32_t		       *arm_db;
+	__be32			       *set_ci_db;
+	__be32			       *arm_db;
 	int				arm_sn;
 	int				cqe_size;
 	struct mlx4_qp			*cur_qp;
@@ -179,7 +179,7 @@  struct mlx4_srq {
 	int				wqe_shift;
 	int				head;
 	int				tail;
-	uint32_t		       *db;
+	__be32			       *db;
 	uint16_t			counter;
 	uint8_t				ext_srq;
 };
@@ -202,12 +202,12 @@  struct mlx4_qp {
 	int				max_inline_data;
 	int				buf_size;
 
-	uint32_t			doorbell_qpn;
-	uint32_t			sq_signal_bits;
+	__be32				doorbell_qpn;
+	__be32				sq_signal_bits;
 	int				sq_spare_wqes;
 	struct mlx4_wq			sq;
 
-	uint32_t		       *db;
+	__be32			       *db;
 	struct mlx4_wq			rq;
 
 	uint8_t				link_layer;
@@ -292,8 +292,8 @@  static inline int cleanup_on_fatal(int ret)
 int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
 void mlx4_free_buf(struct mlx4_buf *buf);
 
-uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type);
-void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db);
+__be32 *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type);
+void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, __be32 *db);
 
 int mlx4_query_device(struct ibv_context *context,
 		       struct ibv_device_attr *attr);
diff --git a/providers/mlx4/mlx4dv.h b/providers/mlx4/mlx4dv.h
index ea16ce8c85dcd4..16045ef223e746 100644
--- a/providers/mlx4/mlx4dv.h
+++ b/providers/mlx4/mlx4dv.h
@@ -125,20 +125,20 @@  enum mlx4_cqe_status {
 };
 
 struct mlx4_cqe {
-	uint32_t	vlan_my_qpn;
-	uint32_t	immed_rss_invalid;
-	uint32_t	g_mlpath_rqpn;
+	__be32		vlan_my_qpn;
+	__be32		immed_rss_invalid;
+	__be32		g_mlpath_rqpn;
 	union {
 		struct {
-			uint16_t	sl_vid;
-			uint16_t	rlid;
+			__be16	sl_vid;
+			__be16	rlid;
 		};
-		uint32_t ts_47_16;
+		__be32	ts_47_16;
 	};
-	uint32_t	status;
-	uint32_t	byte_cnt;
-	uint16_t	wqe_index;
-	uint16_t	checksum;
+	__be32		status;
+	__be32		byte_cnt;
+	__be16		wqe_index;
+	__be16		checksum;
 	uint8_t		reserved3;
 	uint8_t		ts_15_8;
 	uint8_t		ts_7_0;
@@ -146,9 +146,9 @@  struct mlx4_cqe {
 };
 
 struct mlx4dv_qp {
-	uint32_t		*rdb;
+	__be32		       *rdb;
 	uint32_t		*sdb;
-	uint32_t		doorbell_qpn;
+	__be32			doorbell_qpn;
 	struct {
 		uint32_t	wqe_cnt;
 		int		wqe_shift;
@@ -173,8 +173,8 @@  struct mlx4dv_cq {
 	} buf;
 	uint32_t			cqe_cnt;
 	uint32_t			cqn;
-	uint32_t		       *set_ci_db;
-	uint32_t		       *arm_db;
+	__be32			       *set_ci_db;
+	__be32			       *arm_db;
 	int				arm_sn;
 	int				cqe_size;
 	uint64_t			comp_mask;
@@ -187,7 +187,7 @@  struct mlx4dv_srq {
 	int				wqe_shift;
 	int				head;
 	int				tail;
-	uint32_t		       *db;
+	__be32			       *db;
 	uint64_t			comp_mask;
 };
 
@@ -284,28 +284,28 @@  enum {
 
 struct mlx4_wqe_local_inval_seg {
 	uint64_t		reserved1;
-	uint32_t		mem_key;
+	__be32			mem_key;
 	uint32_t		reserved2;
 	uint64_t		reserved3[2];
 };
 
 struct mlx4_wqe_bind_seg {
-	uint32_t		flags1;
-	uint32_t		flags2;
-	uint32_t		new_rkey;
-	uint32_t		lkey;
-	uint64_t		addr;
-	uint64_t		length;
+	__be32			flags1;
+	__be32			flags2;
+	__be32			new_rkey;
+	__be32			lkey;
+	__be64			addr;
+	__be64			length;
 };
 
 struct mlx4_wqe_ctrl_seg {
-	uint32_t		owner_opcode;
+	__be32			owner_opcode;
 	union {
 		struct {
 			uint8_t			reserved[3];
 			uint8_t			fence_size;
 		};
-		uint32_t	bf_qpn;
+		__be32		bf_qpn;
 	};
 	/*
 	 * High 24 bits are SRC remote buffer; low 8 bits are flags:
@@ -316,61 +316,61 @@  struct mlx4_wqe_ctrl_seg {
 	 * [1]   SE (solicited event)
 	 * [0]   FL (force loopback)
 	 */
-	uint32_t		srcrb_flags;
+	__be32			srcrb_flags;
 	/*
 	 * imm is immediate data for send/RDMA write w/ immediate;
 	 * also invalidation key for send with invalidate; input
 	 * modifier for WQEs on CCQs.
 	 */
-	uint32_t		imm;
+	__be32			imm;
 };
 
 struct mlx4_av {
-	uint32_t			port_pd;
+	__be32				port_pd;
 	uint8_t				reserved1;
 	uint8_t				g_slid;
-	uint16_t			dlid;
+	__be16				dlid;
 	uint8_t				reserved2;
 	uint8_t				gid_index;
 	uint8_t				stat_rate;
 	uint8_t				hop_limit;
-	uint32_t			sl_tclass_flowlabel;
+	__be32				sl_tclass_flowlabel;
 	uint8_t				dgid[16];
 };
 
 struct mlx4_wqe_datagram_seg {
 	struct mlx4_av		av;
-	uint32_t		dqpn;
-	uint32_t		qkey;
-	uint16_t		vlan;
+	__be32			dqpn;
+	__be32			qkey;
+	__be16			vlan;
 	uint8_t			mac[6];
 };
 
 struct mlx4_wqe_data_seg {
-	uint32_t		byte_count;
-	uint32_t		lkey;
-	uint64_t		addr;
+	__be32			byte_count;
+	__be32			lkey;
+	__be64			addr;
 };
 
 struct mlx4_wqe_inline_seg {
-	uint32_t		byte_count;
+	__be32			byte_count;
 };
 
 struct mlx4_wqe_srq_next_seg {
 	uint16_t		reserved1;
-	uint16_t		next_wqe_index;
+	__be16			next_wqe_index;
 	uint32_t		reserved2[3];
 };
 
 struct mlx4_wqe_raddr_seg {
-	uint64_t		raddr;
-	uint32_t		rkey;
-	uint32_t		reserved;
+	__be64			raddr;
+	__be32			rkey;
+	__be32			reserved;
 };
 
 struct mlx4_wqe_atomic_seg {
-	uint64_t		swap_add;
-	uint64_t		compare;
+	__be64			swap_add;
+	__be64			compare;
 };
 
 /*
diff --git a/providers/mlx4/qp.c b/providers/mlx4/qp.c
index 1be100b9c2abf7..6d5986ec1c2958 100644
--- a/providers/mlx4/qp.c
+++ b/providers/mlx4/qp.c
@@ -304,7 +304,7 @@  int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 			case IBV_WR_LOCAL_INV:
 				ctrl->srcrb_flags |=
 					htobe32(MLX4_WQE_CTRL_STRONG_ORDER);
-				set_local_inv_seg(wqe, wr->imm_data);
+				set_local_inv_seg(wqe, wr->invalidate_rkey);
 				wqe  += sizeof
 					(struct mlx4_wqe_local_inval_seg);
 				size += sizeof
@@ -320,7 +320,7 @@  int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 					(struct mlx4_wqe_bind_seg) / 16;
 				break;
 			case IBV_WR_SEND_WITH_INV:
-				ctrl->imm = htobe32(wr->imm_data);
+				ctrl->imm = htobe32(wr->invalidate_rkey);
 				break;
 
 			default:
diff --git a/providers/mlx4/verbs.c b/providers/mlx4/verbs.c
index 548ac2ac2864f0..6a240c5146092f 100644
--- a/providers/mlx4/verbs.c
+++ b/providers/mlx4/verbs.c
@@ -104,7 +104,7 @@  int mlx4_query_device_ex(struct ibv_context *context,
 
 static int mlx4_read_clock(struct ibv_context *context, uint64_t *cycles)
 {
-	unsigned int clockhi, clocklo, clockhi1;
+	uint32_t clockhi, clocklo, clockhi1;
 	int i;
 	struct mlx4_context *ctx = to_mctx(context);
 
@@ -1123,14 +1123,7 @@  int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
 static int link_local_gid(const union ibv_gid *gid)
 {
-	uint32_t *tmp = (uint32_t *)gid->raw;
-	uint32_t hi = tmp[0];
-	uint32_t lo = tmp[1];
-
-	if (hi == htobe32(0xfe800000) && lo == 0)
-		return 1;
-
-	return 0;
+	return gid->global.subnet_prefix == htobe64(0xfe80000000000000ULL);
 }
 
 static int is_multicast_gid(const union ibv_gid *gid)