From patchwork Tue Dec 28 15:28:34 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mike Marciniszyn X-Patchwork-Id: 436451 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oBSFc3Mo018707 for ; Tue, 28 Dec 2010 15:38:10 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752771Ab0L1PiI (ORCPT ); Tue, 28 Dec 2010 10:38:08 -0500 Received: from [198.186.4.11] ([198.186.4.11]:11439 "EHLO kop-dev-sles11-04.qlogic.org" rhost-flags-FAIL-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1752892Ab0L1PiB (ORCPT ); Tue, 28 Dec 2010 10:38:01 -0500 Received: from kop-dev-sles11-04.qlogic.org (localhost [127.0.0.1]) by kop-dev-sles11-04.qlogic.org (Postfix) with ESMTP id A42C627E2DB; Tue, 28 Dec 2010 10:28:34 -0500 (EST) Subject: [PATCH 20/24] IB/qib: RDMA lkey/rkey validation is inefficient for large MRs To: Roland Dreier From: Mike Marciniszyn Cc: linux-rdma@vger.kernel.org Date: Tue, 28 Dec 2010 10:28:34 -0500 Message-ID: <20101228152834.19960.62290.stgit@kop-dev-sles11-04.qlogic.org> In-Reply-To: <20101228152648.19960.84006.stgit@kop-dev-sles11-04.qlogic.org> References: <20101228152648.19960.84006.stgit@kop-dev-sles11-04.qlogic.org> User-Agent: StGit/0.15 MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Tue, 28 Dec 2010 15:38:10 +0000 (UTC) diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 4b80eb1..756d160 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->sge_length = sge->length; isge->m = 0; isge->n = 0; + spin_unlock_irqrestore(&rkt->lock, flags); goto ok; } mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; if (unlikely(mr == NULL || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; + atomic_inc(&mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); off = sge->addr - mr->user_base; if (unlikely(sge->addr < mr->user_base || off + sge->length > mr->length || (mr->access_flags & acc) != acc)) - goto bail; + return ret; off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= QIB_SEGSZ) { - m++; - n = 0; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } } } - atomic_inc(&mr->refcount); isge->mr = mr; isge->vaddr = mr->map[m]->segs[n].vaddr + off; isge->length = mr->map[m]->segs[n].length - off; @@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->n = n; ok: ret = 1; + return ret; bail: spin_unlock_irqrestore(&rkt->lock, flags); return ret; @@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, sge->sge_length = len; sge->m = 0; sge->n = 0; + spin_unlock_irqrestore(&rkt->lock, flags); goto ok; } mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; + atomic_inc(&mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) - goto bail; + return ret; off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= QIB_SEGSZ) { - m++; - n = 0; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } } } - atomic_inc(&mr->refcount); sge->mr = mr; sge->vaddr = mr->map[m]->segs[n].vaddr + off; sge->length = mr->map[m]->segs[n].length - off; @@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, sge->n = n; ok: ret = 1; + return ret; bail: spin_unlock_irqrestore(&rkt->lock, flags); return ret; diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 5f95f0f..08944e2 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -39,7 +39,6 @@ /* Fast memory region */ struct qib_fmr { struct ib_fmr ibfmr; - u8 page_shift; struct qib_mregion mr; /* must be last */ }; @@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) goto bail; } mr->mr.mapsz = m; + mr->mr.page_shift = 0; mr->mr.max_segs = count; /* @@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.access_flags = mr_access_flags; mr->umem = umem; + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; list_for_each_entry(chunk, &umem->chunk_list, list) { @@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, fmr->mr.offset = 0; fmr->mr.access_flags = mr_access_flags; fmr->mr.max_segs = fmr_attr->max_pages; - fmr->page_shift = fmr_attr->page_shift; + fmr->mr.page_shift = fmr_attr->page_shift; atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; @@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, spin_lock_irqsave(&rkt->lock, flags); fmr->mr.user_base = iova; fmr->mr.iova = iova; - ps = 1 << fmr->page_shift; + ps = 1 << fmr->mr.page_shift; fmr->mr.length = list_len * ps; m = 0; n = 0; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a08ceab..63b22a9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -301,6 +301,7 @@ struct qib_mregion { int access_flags; u32 max_segs; /* number of qib_segs in all the arrays */ u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ atomic_t refcount; struct qib_segarray *map[0]; /* the segments */ };