diff mbox series

[v6,07/13] SIW application buffer management

Message ID 20190325171047.23824-9-bmt@zurich.ibm.com (mailing list archive)
State Superseded
Headers show
Series SIW: Request for Comments | expand

Commit Message

Bernard Metzler March 25, 2019, 5:10 p.m. UTC
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw_mem.c | 188 ++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 drivers/infiniband/sw/siw/siw_mem.c
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
new file mode 100644
index 000000000000..e5627dbf5c63
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -0,0 +1,188 @@ 
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/version.h>
+#include <linux/scatterlist.h>
+#include <linux/gfp.h>
+#include <rdma/ib_verbs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/pid.h>
+#include <linux/sched/mm.h>
+
+#include "siw.h"
+#include "siw_debug.h"
+
+static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
+			   bool dirty)
+{
+	struct page **p = chunk->p;
+
+	while (num_pages--) {
+		if (!PageDirty(*p) && dirty)
+			set_page_dirty_lock(*p);
+		put_page(*p);
+		p++;
+	}
+}
+
+void siw_umem_release(struct siw_umem *umem, bool dirty)
+{
+	struct mm_struct *mm_s = umem->owning_mm;
+	int i, num_pages = umem->num_pages;
+
+	for (i = 0; num_pages; i++) {
+		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
+
+		siw_free_plist(&umem->page_chunk[i], to_free,
+			       umem->writable && dirty);
+		kfree(umem->page_chunk[i].p);
+		num_pages -= to_free;
+	}
+	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
+
+	mmdrop(mm_s);
+	kfree(umem->page_chunk);
+	kfree(umem);
+}
+
+void siw_pbl_free(struct siw_pbl *pbl)
+{
+	kfree(pbl);
+}
+
+/*
+ * Gets physical address backed by PBL element. Address is referenced
+ * by linear byte offset into list of variably sized PB elements.
+ * Optionally, provides remaining len within current element, and
+ * current PBL index for later resume at same element.
+ */
+u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
+{
+	int i = idx ? *idx : 0;
+
+	while (i < pbl->num_buf) {
+		struct siw_pble *pble = &pbl->pbe[i];
+
+		if (pble->pbl_off + pble->size > off) {
+			u64 pble_off = off - pble->pbl_off;
+
+			if (len)
+				*len = pble->size - pble_off;
+			if (idx)
+				*idx = i;
+
+			return pble->addr + pble_off;
+		}
+		i++;
+	}
+	if (len)
+		*len = 0;
+	return 0;
+}
+
+struct siw_pbl *siw_pbl_alloc(u32 num_buf)
+{
+	struct siw_pbl *pbl;
+	int buf_size = sizeof(*pbl);
+
+	if (num_buf == 0)
+		return ERR_PTR(-EINVAL);
+
+	buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
+
+	pbl = kzalloc(buf_size, GFP_KERNEL);
+	if (!pbl)
+		return ERR_PTR(-ENOMEM);
+
+	pbl->max_buf = num_buf;
+
+	return pbl;
+}
+
+struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+{
+	struct siw_umem *umem;
+	struct mm_struct *mm_s;
+	u64 first_page_va;
+	unsigned long mlock_limit;
+	unsigned int foll_flags = FOLL_WRITE;
+	int num_pages, num_chunks, i, rv = 0;
+
+	if (!can_do_mlock())
+		return ERR_PTR(-EPERM);
+
+	if (!len)
+		return ERR_PTR(-EINVAL);
+
+	first_page_va = start & PAGE_MASK;
+	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
+	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
+
+	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+	if (!umem)
+		return ERR_PTR(-ENOMEM);
+
+	mm_s = current->mm;
+	umem->owning_mm = mm_s;
+	umem->writable = writable;
+
+	mmgrab(mm_s);
+
+	if (!writable)
+		foll_flags |= FOLL_FORCE;
+
+	down_read(&mm_s->mmap_sem);
+
+	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
+		rv = -ENOMEM;
+		goto out_sem_up;
+	}
+	umem->fp_addr = first_page_va;
+
+	umem->page_chunk =
+		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
+	if (!umem->page_chunk) {
+		rv = -ENOMEM;
+		goto out_sem_up;
+	}
+	for (i = 0; num_pages; i++) {
+		int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
+
+		umem->page_chunk[i].p =
+			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
+		if (!umem->page_chunk[i].p) {
+			rv = -ENOMEM;
+			goto out_sem_up;
+		}
+		got = 0;
+		while (nents) {
+			struct page **plist = &umem->page_chunk[i].p[got];
+
+			rv = get_user_pages_longterm(first_page_va, nents,
+						     foll_flags, plist, NULL);
+			if (rv < 0)
+				goto out_sem_up;
+
+			umem->num_pages += rv;
+			atomic64_add(rv, &mm_s->pinned_vm);
+			first_page_va += rv * PAGE_SIZE;
+			nents -= rv;
+			got += rv;
+		}
+		num_pages -= got;
+	}
+out_sem_up:
+	up_read(&mm_s->mmap_sem);
+
+	if (rv > 0)
+		return umem;
+
+	siw_umem_release(umem, false);
+
+	return ERR_PTR(rv);
+}