From patchwork Thu Dec 3 19:05:37 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ralph Campbell X-Patchwork-Id: 64627 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nB3J61TT027769 for ; Thu, 3 Dec 2009 19:06:01 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756634AbZLCTFg (ORCPT ); Thu, 3 Dec 2009 14:05:36 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756677AbZLCTFg (ORCPT ); Thu, 3 Dec 2009 14:05:36 -0500 Received: from vpn.pathscale.com ([198.186.3.75]:52984 "HELO mx.mv.qlogic.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with SMTP id S1756634AbZLCTFa (ORCPT ); Thu, 3 Dec 2009 14:05:30 -0500 Received: from chromite.mv.qlogic.com (chromite.mv.qlogic.com [10.29.2.82]) by mx.mv.qlogic.com (Postfix) with ESMTP id 890C5957C009; Thu, 3 Dec 2009 11:05:37 -0800 (PST) Received: from chromite.mv.qlogic.com (localhost.localdomain [127.0.0.1]) by chromite.mv.qlogic.com (Postfix) with ESMTP id 818C4CC2A1; Thu, 3 Dec 2009 11:05:37 -0800 (PST) From: Ralph Campbell Subject: [PATCH v2 29/51] IB/qib: Add qib_qp.c To: Roland Dreier Cc: linux-rdma@vger.kernel.org Date: Thu, 03 Dec 2009 11:05:37 -0800 Message-ID: <20091203190537.29507.8823.stgit@chromite.mv.qlogic.com> In-Reply-To: <20091203190305.29507.58158.stgit@chromite.mv.qlogic.com> References: <20091203190305.29507.58158.stgit@chromite.mv.qlogic.com> User-Agent: StGIT/0.14.2 MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c new file mode 100644 index 0000000..d0ccbf5 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -0,0 +1,1241 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "qib.h" + +#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) + +static inline unsigned mk_qpn(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off) +{ + return (map - qpt->map) * BITS_PER_PAGE + off; +} + +static inline unsigned find_next_offset(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off, + unsigned r) +{ + if (qpt->mask) { + off++; + if ((off & qpt->mask) >> 1 != r) + off = ((off & qpt->mask) ? + (off | qpt->mask) + 1 : off) | (r << 1); + } else + off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); + return off; +} + +/* + * Convert the AETH credit code into the number of credits. + */ +static u32 credit_table[31] = { + 0, /* 0 */ + 1, /* 1 */ + 2, /* 2 */ + 3, /* 3 */ + 4, /* 4 */ + 6, /* 5 */ + 8, /* 6 */ + 12, /* 7 */ + 16, /* 8 */ + 24, /* 9 */ + 32, /* A */ + 48, /* B */ + 64, /* C */ + 96, /* D */ + 128, /* E */ + 192, /* F */ + 256, /* 10 */ + 384, /* 11 */ + 512, /* 12 */ + 768, /* 13 */ + 1024, /* 14 */ + 1536, /* 15 */ + 2048, /* 16 */ + 3072, /* 17 */ + 4096, /* 18 */ + 6144, /* 19 */ + 8192, /* 1A */ + 12288, /* 1B */ + 16384, /* 1C */ + 24576, /* 1D */ + 32768 /* 1E */ +}; + +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct qpn_map *map; + u32 ret; + int r; + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + r = smp_processor_id(); + if (r >= dd->n_krcv_queues) + r %= dd->n_krcv_queues; + qpn = qpt->last + 1; + if (qpn >= QPN_MAX) + qpn = 2; + if (qpt->mask && ((qpn & qpt->mask) >> 1) != r) + qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) | + (r << 1); + offset = qpn & BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset = find_next_offset(qpt, map, offset, r); + qpn = mk_qpn(qpt, map, offset); + /* + * This test differs from alloc_pidmap(). + * If find_next_offset() does find a zero + * bit, we don't need to check for QPN + * wrapping around past our starting QPN. + * We just need to be sure we don't loop + * forever. + */ + } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + offset = qpt->mask ? (r << 1) : 0; + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + offset = qpt->mask ? (r << 1) : 0; + } else { + map = &qpt->map[0]; + offset = qpt->mask ? (r << 1) : 2; + } + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) +{ + struct qpn_map *map; + + map = qpt->map + qpn / BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); +} + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned n = qp->ibqp.qp_num % dev->qp_table_size; + unsigned long flags; + + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (qp->ibqp.qp_num == 0) + ibp->qp0 = qp; + else if (qp->ibqp.qp_num == 1) + ibp->qp1 = qp; + else { + qp->next = dev->qp_table[n]; + dev->qp_table[n] = qp; + } + atomic_inc(&qp->refcount); + + spin_unlock_irqrestore(&dev->qpt_lock, flags); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive interrupt routine. + */ +static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_qp *q, **qpp; + unsigned long flags; + + qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; + + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (ibp->qp0 == qp) { + ibp->qp0 = NULL; + atomic_dec(&qp->refcount); + } else if (ibp->qp1 == qp) { + ibp->qp1 = NULL; + atomic_dec(&qp->refcount); + } else + for (; (q = *qpp) != NULL; qpp = &q->next) + if (q == qp) { + *qpp = qp->next; + qp->next = NULL; + atomic_dec(&qp->refcount); + break; + } + + spin_unlock_irqrestore(&dev->qpt_lock, flags); +} + +/** + * qib_free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +unsigned qib_free_all_qps(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + unsigned long flags; + struct qib_qp *qp; + unsigned n, qp_inuse = 0; + + for (n = 0; n < dd->num_pports; n++) { + struct qib_ibport *ibp = &dd->pport[n].ibport_data; + + if (!qib_mcast_tree_empty(ibp)) + qp_inuse++; + if (ibp->qp0) + qp_inuse++; + if (ibp->qp1) + qp_inuse++; + } + + spin_lock_irqsave(&dev->qpt_lock, flags); + for (n = 0; n < dev->qp_table_size; n++) { + qp = dev->qp_table[n]; + dev->qp_table[n] = NULL; + + for (; qp; qp = qp->next) + qp_inuse++; + } + spin_unlock_irqrestore(&dev->qpt_lock, flags); + + return qp_inuse; +} + +/** + * qib_lookup_qpn - return the QP with the given QPN + * @qpt: the QP table + * @qpn: the QP number to look up + * + * The caller is responsible for decrementing the QP reference count + * when done. + */ +struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) +{ + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned long flags; + struct qib_qp *qp; + + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (qpn == 0) + qp = ibp->qp0; + else if (qpn == 1) + qp = ibp->qp1; + else + for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; + qp = qp->next) + if (qp->ibqp.qp_num == qpn) + break; + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock_irqrestore(&dev->qpt_lock, flags); + return qp; +} + +/** + * qib_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + atomic_set(&qp->s_dma_busy, 0); + qp->s_flags &= QIB_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + +static void clear_mr_refs(struct qib_qp *qp, int clr_sends) +{ + unsigned n; + + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct qib_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * qib_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct ib_wc wc; + int ret = 0; + + if (qp->state == IB_QPS_ERR) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { + qp->s_flags &= ~QIB_S_ANY_WAIT_IO; + list_del_init(&qp->iowait); + } + spin_unlock(&dev->pending_lock); + + if (!(qp->s_flags & QIB_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + } + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + qib_schedule_send(qp); + + clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct qib_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) + ret = 1; + +bail: + return ret; +} + +/** + * qib_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for libibverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_qp *qp = to_iqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int ret; + u32 pmtu = 0; /* for gcc warning only */ + + spin_lock_irq(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > QIB_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + + /* + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. + */ + if (attr_mask & IB_QP_PATH_MTU) { + struct qib_devdata *dd = dd_from_dev(dev); + int mtu, pidx = qp->port_num - 1; + + mtu = ib_mtu_enum_to_int(attr->path_mtu); + if (mtu == -1) + goto inval; + if (mtu > dd->pport[pidx].ibmtu) { + switch (dd->pport[pidx].ibmtu) { + case 4096: + pmtu = IB_MTU_4096; + break; + case 2048: + pmtu = IB_MTU_2048; + break; + case 1024: + pmtu = IB_MTU_1024; + break; + case 512: + pmtu = IB_MTU_512; + break; + case 256: + pmtu = IB_MTU_256; + break; + default: + pmtu = IB_MTU_2048; + } + } else + pmtu = attr->path_mtu; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else + goto inval; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock_irq(&qp->s_lock); + /* Stop the sending work queue and retry timer */ + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + spin_lock_irq(&qp->s_lock); + clear_mr_refs(qp, 1); + qib_reset_qp(qp, ibqp->qp_type); + } + break; + + case IB_QPS_RTR: + /* Allow event to retrigger if QP set to RTR more than once */ + qp->r_flags &= ~QIB_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & QIB_PSN_MASK; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + } + } + + if (attr_mask & IB_QP_PATH_MTU) + qp->path_mtu = pmtu; + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock_irq(&qp->s_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + insert_qp(dev, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + ret = 0; + goto bail; + +inval: + spin_unlock_irq(&qp->s_lock); + ret = -EINVAL; + +bail: + return ret; +} + +int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct qib_qp *qp = to_iqp(ibqp); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = qp->s_mig_state; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn & QIB_PSN_MASK; + attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + attr->alt_ah_attr = qp->alt_ah_attr; + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = qp->s_alt_pkey_index; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = qp->port_num; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = qp->alt_ah_attr.port_num; + attr->alt_timeout = qp->alt_timeout; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = qp->port_num; + return 0; +} + +/** + * qib_compute_aeth - compute the AETH (syndrome + MSN) + * @qp: the queue pair to compute the AETH for + * + * Returns the AETH. + */ +__be32 qib_compute_aeth(struct qib_qp *qp) +{ + u32 aeth = qp->r_msn & QIB_MSN_MASK; + + if (qp->ibqp.srq) { + /* + * Shared receive queues don't generate credits. + * Set the credit field to the invalid value. + */ + aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT; + } else { + u32 min, max, x; + u32 credits; + struct qib_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * XXX Not holding the r_rq.lock here so there is a small + * chance that the pair of reads are not atomic. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + /* + * Binary search the credit table to find the code to + * use. + */ + min = 0; + max = 31; + for (;;) { + x = (min + max) / 2; + if (credit_table[x] == credits) + break; + if (credit_table[x] > credits) + max = x; + else if (min == x) + break; + else + min = x; + } + aeth |= x << QIB_AETH_CREDIT_SHIFT; + } + return cpu_to_be32(aeth); +} + +/** + * qib_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: user data for libibverbs.so + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *qib_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qib_qp *qp; + int err; + struct qib_swqe *swq = NULL; + struct qib_ibdev *dev; + struct qib_devdata *dd; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret; + + if (init_attr->cap.max_send_sge > ib_qib_max_sges || + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > ib_qib_max_sges || + init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct qib_sge) * + init_attr->cap.max_send_sge + + sizeof(struct qib_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (swq == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct qib_srq *srq = to_isrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) { + ret = ERR_PTR(-ENOMEM); + goto bail_swq; + } + if (init_attr->srq) + sz = 0; + else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct qib_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_WORK(&qp->s_work, qib_do_send); + INIT_LIST_HEAD(&qp->iowait); + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = QIB_S_SIGNAL_REQ_WR; + dev = to_idev(ibpd->device); + dd = dd_from_dev(dev); + err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + vfree(qp->r_rq.wq); + goto bail_qp; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + qp->processor_id = smp_processor_id(); + qib_reset_qp(qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + ret = ERR_PTR(-ENOSYS); + goto bail; + } + + init_attr->cap.max_inline_data = 0; + + /* + * Return the address of the RWQ as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; + + qp->ip = qib_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_qib_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &qp->ibqp; + goto bail; + +bail_ip: + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); +bail: + return ret; +} + +/** + * qib_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int qib_destroy_qp(struct ib_qp *ibqp) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock_irq(&qp->s_lock); + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + clear_mr_refs(qp, 1); + } else + spin_unlock_irq(&qp->s_lock); + + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + vfree(qp->s_wq); + kfree(qp); + return 0; +} + +/** + * qib_init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) +{ + spin_lock_init(&qpt->lock); + qpt->last = 1; /* start with QPN 2 */ + qpt->nmaps = 1; + qpt->mask = dd->qpn_mask; +} + +/** + * qib_free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +void qib_free_qpn_table(struct qib_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + if (qpt->map[i].page) + free_page((unsigned long) qpt->map[i].page); +} + +/** + * qib_get_credit - flush the send work queue of a QP + * @qp: the qp who's send work queue to flush + * @aeth: the Acknowledge Extended Transport Header + * + * The QP s_lock should be held. + */ +void qib_get_credit(struct qib_qp *qp, u32 aeth) +{ + u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; + + /* + * If the credit is invalid, we can send + * as many packets as we like. Otherwise, we have to + * honor the credit field. + */ + if (credit == QIB_AETH_CREDIT_INVAL) { + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + qp->s_flags |= QIB_S_UNLIMITED_CREDIT; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + /* Compute new LSN (i.e., MSN + credit) */ + credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; + if (qib_cmp24(credit, qp->s_lsn) > 0) { + qp->s_lsn = credit; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } +}