[v4,07/13] SIW application buffer management

Message ID	20190130172136.23625-8-bmt@zurich.ibm.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-rdma-owner@kernel.org> Gateway: Authorized Use Only! Violators will be prosecuted for <linux-rdma@vger.kernel.org> from <bmt@zurich.ibm.com>; Wed, 30 Jan 2019 17:22:03 -0000 Gateway: Authorized Use Only! Violators will be prosecuted; (version=TLSv1/SSLv3 cipher=AES256-GCM-SHA384 bits=256/256) Wed, 30 Jan 2019 17:22:02 -0000 From: bmt@zurich.ibm.com To: linux-rdma@vger.kernel.org Cc: Bernard Metzler <bmt@zurich.ibm.com> Subject: [PATCH v4 07/13] SIW application buffer management Date: Wed, 30 Jan 2019 18:21:30 +0100 In-Reply-To: <20190130172136.23625-1-bmt@zurich.ibm.com> References: <20190130172136.23625-1-bmt@zurich.ibm.com> Message-Id: <20190130172136.23625-8-bmt@zurich.ibm.com> Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7BIT MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk
Series	SIW: Request for Comments \| expand [v4,00/13] SIW: Request for Comments [v4,01/13] iWarp wire packet format [v4,02/13] SIW main include file [v4,03/13] SIW network and RDMA core interface [v4,04/13] SIW object management [v4,05/13] SIW connection management [v4,06/13] SIW application interface [v4,07/13] SIW application buffer management [v4,08/13] SIW queue pair methods [v4,09/13] SIW transmit path [v4,10/13] SIW receive path [v4,11/13] SIW completion queue methods [v4,12/13] SIW debugging [v4,13/13] SIW addition to kernel build environment

diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c new file mode 100644 index 000000000000..1db8b72182f8 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -0,0 +1,243 @@ +/* + * Software iWARP device driver + * + * Authors: Animesh Trivedi <atr@zurich.ibm.com> + * Bernard Metzler <bmt@zurich.ibm.com> + * + * Copyright (c) 2008-2018, IBM Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of IBM nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/version.h> +#include <linux/scatterlist.h> +#include <linux/gfp.h> +#include <rdma/ib_verbs.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/pid.h> +#include <linux/sched/mm.h> + +#include "siw.h" +#include "siw_debug.h" + +static void siw_umem_update_stats(struct work_struct *work) +{ + struct siw_umem *umem = container_of(work, struct siw_umem, work); + struct mm_struct *mm_s = umem->mm_s; + + down_write(&mm_s->mmap_sem); + mm_s->pinned_vm -= umem->num_pages; + up_write(&mm_s->mmap_sem); + + mmput(mm_s); + + kfree(umem->page_chunk); + kfree(umem); +} + +static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages) +{ + struct page **p = chunk->p; + + while (num_pages--) { + put_page(*p); + p++; + } +} + +void siw_umem_release(struct siw_umem *umem) +{ + struct task_struct *task = get_pid_task(umem->pid, PIDTYPE_PID); + int i, num_pages = umem->num_pages; + + for (i = 0; num_pages; i++) { + int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + + siw_free_plist(&umem->page_chunk[i], to_free); + kfree(umem->page_chunk[i].p); + num_pages -= to_free; + } + put_pid(umem->pid); + if (task) { + struct mm_struct *mm_s = get_task_mm(task); + + put_task_struct(task); + if (mm_s) { + if (down_write_trylock(&mm_s->mmap_sem)) { + mm_s->pinned_vm -= umem->num_pages; + up_write(&mm_s->mmap_sem); + mmput(mm_s); + } else { + /* + * Schedule delayed accounting if + * mm semaphore is not available + */ + INIT_WORK(&umem->work, siw_umem_update_stats); + umem->mm_s = mm_s; + schedule_work(&umem->work); + + return; + } + } + } + kfree(umem->page_chunk); + kfree(umem); +} + +void siw_pbl_free(struct siw_pbl *pbl) +{ + kfree(pbl); +} + +/* + * Get physical address backed by PBL element. Address is referenced + * by linear byte offset into list of variably sized PB elements. + * Optionally, provide remaining len within current element, and + * current PBL index for later resume at same element. + */ +u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +{ + int i = idx ? *idx : 0; + + while (i < pbl->num_buf) { + struct siw_pble *pble = &pbl->pbe[i]; + + if (pble->pbl_off + pble->size > off) { + u64 pble_off = off - pble->pbl_off; + + if (len) + *len = pble->size - pble_off; + if (idx) + *idx = i; + + return pble->addr + pble_off; + } + i++; + } + if (len) + *len = 0; + return 0; +} + +struct siw_pbl *siw_pbl_alloc(u32 num_buf) +{ + struct siw_pbl *pbl; + int buf_size = sizeof(*pbl); + + if (num_buf == 0) + return ERR_PTR(-EINVAL); + + buf_size += ((num_buf - 1) * sizeof(struct siw_pble)); + + pbl = kzalloc(buf_size, GFP_KERNEL); + if (!pbl) + return ERR_PTR(-ENOMEM); + + pbl->max_buf = num_buf; + + return pbl; +} + +struct siw_umem *siw_umem_get(u64 start, u64 len) +{ + struct siw_umem *umem; + u64 first_page_va; + unsigned long mlock_limit; + int num_pages, num_chunks, i, rv = 0; + + if (!can_do_mlock()) + return ERR_PTR(-EPERM); + + if (!len) + return ERR_PTR(-EINVAL); + + first_page_va = start & PAGE_MASK; + num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; + num_chunks = (num_pages >> CHUNK_SHIFT) + 1; + + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + + umem->pid = get_task_pid(current, PIDTYPE_PID); + + down_write(&current->mm->mmap_sem); + + mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages + current->mm->pinned_vm > mlock_limit) { + rv = -ENOMEM; + goto out; + } + umem->fp_addr = first_page_va; + + umem->page_chunk = kcalloc(num_chunks, sizeof(struct siw_page_chunk), + GFP_KERNEL); + if (!umem->page_chunk) { + rv = -ENOMEM; + goto out; + } + for (i = 0; num_pages; i++) { + int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK); + + umem->page_chunk[i].p = kcalloc(nents, sizeof(struct page *), + GFP_KERNEL); + if (!umem->page_chunk[i].p) { + rv = -ENOMEM; + goto out; + } + got = 0; + while (nents) { + struct page **plist = &umem->page_chunk[i].p[got]; + + rv = get_user_pages(first_page_va, nents, FOLL_WRITE, + plist, NULL); + if (rv < 0) + goto out; + + umem->num_pages += rv; + current->mm->pinned_vm += rv; + first_page_va += rv * PAGE_SIZE; + nents -= rv; + got += rv; + } + num_pages -= got; + } +out: + up_write(&current->mm->mmap_sem); + + if (rv > 0) + return umem; + + siw_umem_release(umem); + + return ERR_PTR(rv); +}

[v4,07/13] SIW application buffer management

Commit Message

Patch