From patchwork Tue Dec 28 15:28:39 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mike Marciniszyn X-Patchwork-Id: 436361 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oBSFc2Ml018694 for ; Tue, 28 Dec 2010 15:38:03 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752412Ab0L1PiB (ORCPT ); Tue, 28 Dec 2010 10:38:01 -0500 Received: from [198.186.4.11] ([198.186.4.11]:37311 "EHLO kop-dev-sles11-04.qlogic.org" rhost-flags-FAIL-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1751646Ab0L1PiA (ORCPT ); Tue, 28 Dec 2010 10:38:00 -0500 X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Tue, 28 Dec 2010 15:38:03 +0000 (UTC) X-Greylist: delayed 668 seconds by postgrey-1.27 at vger.kernel.org; Tue, 28 Dec 2010 10:38:00 EST Received: from kop-dev-sles11-04.qlogic.org (localhost [127.0.0.1]) by kop-dev-sles11-04.qlogic.org (Postfix) with ESMTP id D6A5C27E2DC; Tue, 28 Dec 2010 10:28:39 -0500 (EST) Subject: [PATCH 21/24] IB/qib: Issue pre-emptive NAKs on eager buffer overflow To: Roland Dreier From: Mike Marciniszyn Cc: linux-rdma@vger.kernel.org Date: Tue, 28 Dec 2010 10:28:39 -0500 Message-ID: <20101228152839.19960.42620.stgit@kop-dev-sles11-04.qlogic.org> In-Reply-To: <20101228152648.19960.84006.stgit@kop-dev-sles11-04.qlogic.org> References: <20101228152648.19960.84006.stgit@kop-dev-sles11-04.qlogic.org> User-Agent: StGit/0.15 MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 816a6bd..23e584f 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -289,14 +289,147 @@ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) * Returns 1 if error was a CRC, else 0. * Needed for some chip's synthesized error counters. */ -static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt, - u32 eflags, u32 l, u32 etail, __le32 *rhf_addr, - struct qib_message_header *hdr) +static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, + u32 ctxt, u32 eflags, u32 l, u32 etail, + __le32 *rhf_addr, struct qib_message_header *rhdr) { u32 ret = 0; if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) ret = 1; + else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { + /* For TIDERR and RC QPs premptively schedule a NAK */ + struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; + struct qib_other_headers *ohdr = NULL; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_qp *qp = NULL; + u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); + u16 lid = be16_to_cpu(hdr->lrh[1]); + int lnh = be16_to_cpu(hdr->lrh[0]) & 3; + u32 qp_num; + u32 opcode; + u32 psn; + int diff; + unsigned long flags; + + /* Sanity check packet */ + if (tlen < 24) + goto drop; + + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + /* Get opcode and PSN from packet */ + opcode = be32_to_cpu(ohdr->bth[0]); + opcode >>= 24; + psn = be32_to_cpu(ohdr->bth[2]); + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num != QIB_MULTICAST_QPN) { + int ruc_res; + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + + /* + * Handle only RC QPs - for other QP types drop error + * packet. + */ + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + spin_lock_irqsave(&qp->s_lock, flags); + ruc_res = + qib_ruc_check_hdr( + ibp, hdr, + lnh == QIB_LRH_GRH, + qp, + be32_to_cpu(ohdr->bth[0])); + if (ruc_res) { + spin_unlock_irqrestore(&qp->s_lock, + flags); + goto unlock; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Only deal with RDMA Writes for now */ + if (opcode < + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { + diff = qib_cmp24(psn, qp->r_psn); + if (!qp->r_nak_state && diff >= 0) { + ibp->n_rc_seqnak++; + qp->r_nak_state = + IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence + * NAK until all packets + * in the receive queue have + * been processed. + * Otherwise, we end up + * propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= + QIB_R_RSP_NAK; + atomic_inc( + &qp->refcount); + list_add_tail( + &qp->rspwait, + &rcd->qp_wait_list); + } + } /* Out of sequence NAK */ + } /* QP Request NAKs */ + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + case IB_QPT_UC: + default: + /* For now don't handle any other QP types */ + break; + } + +unlock: + spin_unlock(&qp->r_lock); + /* + * Notify qib_destroy_qp() if it is waiting + * for us to finish. + */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } /* Unicast QP */ + } /* Valid packet with TIDErr */ + +drop: return ret; } @@ -376,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) * packets; only qibhdrerr should be set. */ if (unlikely(eflags)) - crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l, + crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l, etail, rhf_addr, hdr); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { qib_ib_rcv(rcd, hdr, ebuf, tlen);