diff mbox

[for-rc,v2,1/6] IB/hfi1: Fix handling of FECN marked multicast packet

Message ID 20180501123532.24520.80857.stgit@scvm10.sc.intel.com (mailing list archive)
State Accepted
Headers show

Commit Message

Dennis Dalessandro May 1, 2018, 12:35 p.m. UTC
From: Mike Marciniszyn <mike.marciniszyn@intel.com>

The code for handling a marked UD packet unconditionally returns the
dlid in the header of the FECN marked packet.  This is not correct
for multicast packets where the DLID is in the multicast range.

The subsequent attempt to send the CNP with the multicast lid will
cause the chip to halt the ack send context because the source
lid doesn't match the chip programming.   The send context will
be halted and flush any other pending packets in the pio ring causing
the CNP to not be sent.

A part of investigating the fix, it was determined that the 16B work
broke the FECN routine badly with inconsistent use of 16 bit and 32 bits
types for lids and pkeys.  Since the port's source lid was correctly 32
bits the type mixmatches need to be dealt with at the same time as
fixing the CNP header issue.

Fix these issues by:
- Using the ports lid for as the SLID for responding to FECN marked UD
  packets
- Insure pkey is always 16 bit in this and subordinate routines
- Insure lids are 32 bits in this and subordinate routines

Cc: <stable@vger.kernel.org> # 4.14.x
Fixes: 88733e3b8450 ("IB/hfi1: Add 16B UD support")
Reviewed-by: Don Hiatt <don.hiatt@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/driver.c |   19 +++++++++++++++----
 drivers/infiniband/hw/hfi1/hfi.h    |    8 ++++----
 drivers/infiniband/hw/hfi1/ud.c     |    6 +++---
 3 files changed, 22 insertions(+), 11 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jason Gunthorpe May 1, 2018, 6:05 p.m. UTC | #1
On Tue, May 01, 2018 at 05:35:36AM -0700, Dennis Dalessandro wrote:
> @@ -719,7 +719,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
>  
>  	lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
>  
> -	bth0 = pkey | (IB_OPCODE_CNP << 24);
> +	bth0 = (u32)pkey | (IB_OPCODE_CNP << 24);

What do you think this cast does??

void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
                    u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
                    u8 sc5, const struct ib_grh *old_grh)
{


Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marciniszyn, Mike May 1, 2018, 9:58 p.m. UTC | #2
> Subject: Re: [PATCH for-rc v2 1/6] IB/hfi1: Fix handling of FECN marked
> multicast packet
> 
> On Tue, May 01, 2018 at 05:35:36AM -0700, Dennis Dalessandro wrote:
> > @@ -719,7 +719,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct
> rvt_qp *qp, u32 remote_qpn,
> >
> >  	lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
> >
> > -	bth0 = pkey | (IB_OPCODE_CNP << 24);
> > +	bth0 = (u32)pkey | (IB_OPCODE_CNP << 24);
> 

I'm verifying the fix w/o the cast...

But I see Doug pulled the fix?

Mike


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford May 2, 2018, 2:24 a.m. UTC | #3
On Tue, 2018-05-01 at 21:58 +0000, Marciniszyn, Mike wrote:
> > Subject: Re: [PATCH for-rc v2 1/6] IB/hfi1: Fix handling of FECN marked
> > multicast packet
> > 
> > On Tue, May 01, 2018 at 05:35:36AM -0700, Dennis Dalessandro wrote:
> > > @@ -719,7 +719,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct
> > 
> > rvt_qp *qp, u32 remote_qpn,
> > > 
> > >  	lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
> > > 
> > > -	bth0 = pkey | (IB_OPCODE_CNP << 24);
> > > +	bth0 = (u32)pkey | (IB_OPCODE_CNP << 24);
> 
> I'm verifying the fix w/o the cast...
> 
> But I see Doug pulled the fix?

It's in my wip branch, so until I merge it into the official for-rc
branch, I can still fix up minor things like this.
diff mbox

Patch

diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 46d1475..bd837a0 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -433,31 +433,43 @@  void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
 			       bool do_cnp)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct ib_other_headers *ohdr = pkt->ohdr;
 	struct ib_grh *grh = pkt->grh;
 	u32 rqpn = 0, bth1;
-	u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr);
+	u16 pkey;
+	u32 rlid, slid, dlid = 0;
 	u8 hdr_type, sc, svc_type;
 	bool is_mcast = false;
 
+	/* can be called from prescan */
 	if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
 		is_mcast = hfi1_is_16B_mcast(dlid);
 		pkey = hfi1_16B_get_pkey(pkt->hdr);
 		sc = hfi1_16B_get_sc(pkt->hdr);
+		dlid = hfi1_16B_get_dlid(pkt->hdr);
+		slid = hfi1_16B_get_slid(pkt->hdr);
 		hdr_type = HFI1_PKT_TYPE_16B;
 	} else {
 		is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
 			   (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 		pkey = ib_bth_get_pkey(ohdr);
 		sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
+		dlid = ib_get_dlid(pkt->hdr);
+		slid = ib_get_slid(pkt->hdr);
 		hdr_type = HFI1_PKT_TYPE_9B;
 	}
 
 	switch (qp->ibqp.qp_type) {
+	case IB_QPT_UD:
+		dlid = ppd->lid;
+		rlid = slid;
+		rqpn = ib_get_sqpn(pkt->ohdr);
+		svc_type = IB_CC_SVCTYPE_UD;
+		break;
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-	case IB_QPT_UD:
-		rlid = ib_get_slid(pkt->hdr);
+		rlid = slid;
 		rqpn = ib_get_sqpn(pkt->ohdr);
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
@@ -482,7 +494,6 @@  void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
 					      dlid, rlid, sc, grh);
 
 	if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
-		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 		u32 lqpn = bth1 & RVT_QPN_MASK;
 		u8 sl = ibp->sc_to_sl[sc];
 
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 32c4826..cac2c62 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1537,13 +1537,13 @@  static inline u32 egress_cycles(u32 len, u32 rate)
 void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
 		  u32 rqpn, u8 svc_type);
 void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
-		u32 pkey, u32 slid, u32 dlid, u8 sc5,
+		u16 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh);
 void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-		    u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+		    u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
 		    u8 sc5, const struct ib_grh *old_grh);
 typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-				u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+				u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
 				u8 sc5, const struct ib_grh *old_grh);
 
 #define PKEY_CHECK_INVALID -1
@@ -2437,7 +2437,7 @@  static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
 		((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
 	lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
 		((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
-	lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT);
+	lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
 	lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
 
 	hdr->lrh[0] = lrh0;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index bcf3b0b..49a38a6 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -628,7 +628,7 @@  int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
 }
 
 void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
-		    u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+		    u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
 		    u8 sc5, const struct ib_grh *old_grh)
 {
 	u64 pbc, pbc_flags = 0;
@@ -687,7 +687,7 @@  void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
 }
 
 void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
-		u32 pkey, u32 slid, u32 dlid, u8 sc5,
+		u16 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh)
 {
 	u64 pbc, pbc_flags = 0;
@@ -719,7 +719,7 @@  void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 
 	lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
 
-	bth0 = pkey | (IB_OPCODE_CNP << 24);
+	bth0 = (u32)pkey | (IB_OPCODE_CNP << 24);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 
 	ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));