diff mbox series

[RFC,4/5] mm/hmm: add support for peer to peer to HMM device memory

Message ID 20190129174728.6430-5-jglisse@redhat.com (mailing list archive)
State New, archived
Headers show
Series Device peer to peer (p2p) through vma | expand

Commit Message

Jerome Glisse Jan. 29, 2019, 5:47 p.m. UTC
From: Jérôme Glisse <jglisse@redhat.com>

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: linux-pci@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: iommu@lists.linux-foundation.org
---
 include/linux/hmm.h | 47 +++++++++++++++++++++++++++++++++
 mm/hmm.c            | 63 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 4a1454e3efba..7a3ac182cc48 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -710,6 +710,53 @@  struct hmm_devmem_ops {
 		     const struct page *page,
 		     unsigned int flags,
 		     pmd_t *pmdp);
+
+	/*
+	 * p2p_map() - map page for peer to peer between device
+	 * @devmem: device memory structure (see struct hmm_devmem)
+	 * @range: range of virtual address that is being mapped
+	 * @device: device the range is being map to
+	 * @addr: first virtual address in the range to consider
+	 * @pa: device address (where actual mapping is store)
+	 * Returns: number of page successfuly mapped, 0 otherwise
+	 *
+	 * Map page belonging to devmem to another device for peer to peer
+	 * access. Device can decide not to map in which case memory will
+	 * be migrated to main memory.
+	 *
+	 * Also there is no garantee that all the pages in the range does
+	 * belongs to the devmem so it is up to the function to check that
+	 * every single page does belong to devmem.
+	 *
+	 * Note for now we do not care about error exect error, so on failure
+	 * function should just return 0.
+	 */
+	long (*p2p_map)(struct hmm_devmem *devmem,
+			struct hmm_range *range,
+			struct device *device,
+			unsigned long addr,
+			dma_addr_t *pas);
+
+	/*
+	 * p2p_unmap() - unmap page from peer to peer between device
+	 * @devmem: device memory structure (see struct hmm_devmem)
+	 * @range: range of virtual address that is being mapped
+	 * @device: device the range is being map to
+	 * @addr: first virtual address in the range to consider
+	 * @pa: device address (where actual mapping is store)
+	 * Returns: number of page successfuly unmapped, 0 otherwise
+	 *
+	 * Unmap page belonging to devmem previously map with p2p_map().
+	 *
+	 * Note there is no garantee that all the pages in the range does
+	 * belongs to the devmem so it is up to the function to check that
+	 * every single page does belong to devmem.
+	 */
+	unsigned long (*p2p_unmap)(struct hmm_devmem *devmem,
+				   struct hmm_range *range,
+				   struct device *device,
+				   unsigned long addr,
+				   dma_addr_t *pas);
 };
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index 1a444885404e..fd49b1e116d0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1193,16 +1193,19 @@  long hmm_range_dma_map(struct hmm_range *range,
 		       dma_addr_t *daddrs,
 		       bool block)
 {
-	unsigned long i, npages, mapped, page_size;
+	unsigned long i, npages, mapped, page_size, addr;
 	long ret;
 
+again:
 	ret = hmm_range_fault(range, block);
 	if (ret <= 0)
 		return ret ? ret : -EBUSY;
 
+	mapped = 0;
+	addr = range->start;
 	page_size = hmm_range_page_size(range);
 	npages = (range->end - range->start) >> range->page_shift;
-	for (i = 0, mapped = 0; i < npages; ++i) {
+	for (i = 0; i < npages; ++i, addr += page_size) {
 		enum dma_data_direction dir = DMA_FROM_DEVICE;
 		struct page *page;
 
@@ -1226,6 +1229,29 @@  long hmm_range_dma_map(struct hmm_range *range,
 			goto unmap;
 		}
 
+		if (is_device_private_page(page)) {
+			struct hmm_devmem *devmem = page->pgmap->data;
+
+			if (!devmem->ops->p2p_map || !devmem->ops->p2p_unmap) {
+				/* Fall-back to main memory. */
+				range->default_flags |=
+					range->flags[HMM_PFN_DEVICE_PRIVATE];
+				goto again;
+			}
+
+			ret = devmem->ops->p2p_map(devmem, range, device,
+						   addr, daddrs);
+			if (ret <= 0) {
+				/* Fall-back to main memory. */
+				range->default_flags |=
+					range->flags[HMM_PFN_DEVICE_PRIVATE];
+				goto again;
+			}
+			mapped += ret;
+			i += ret;
+			continue;
+		}
+
 		/* If it is read and write than map bi-directional. */
 		if (range->pfns[i] & range->values[HMM_PFN_WRITE])
 			dir = DMA_BIDIRECTIONAL;
@@ -1242,7 +1268,9 @@  long hmm_range_dma_map(struct hmm_range *range,
 	return mapped;
 
 unmap:
-	for (npages = i, i = 0; (i < npages) && mapped; ++i) {
+	npages = i;
+	addr = range->start;
+	for (i = 0; (i < npages) && mapped; ++i, addr += page_size) {
 		enum dma_data_direction dir = DMA_FROM_DEVICE;
 		struct page *page;
 
@@ -1253,6 +1281,18 @@  long hmm_range_dma_map(struct hmm_range *range,
 		if (dma_mapping_error(device, daddrs[i]))
 			continue;
 
+		if (is_device_private_page(page)) {
+			struct hmm_devmem *devmem = page->pgmap->data;
+			unsigned long inc;
+
+			inc = devmem->ops->p2p_unmap(devmem, range, device,
+						     addr, &daddrs[i]);
+			BUG_ON(inc > npages);
+			mapped += inc;
+			i += inc;
+			continue;
+		}
+
 		/* If it is read and write than map bi-directional. */
 		if (range->pfns[i] & range->values[HMM_PFN_WRITE])
 			dir = DMA_BIDIRECTIONAL;
@@ -1285,7 +1325,7 @@  long hmm_range_dma_unmap(struct hmm_range *range,
 			 dma_addr_t *daddrs,
 			 bool dirty)
 {
-	unsigned long i, npages, page_size;
+	unsigned long i, npages, page_size, addr;
 	long cpages = 0;
 
 	/* Sanity check. */
@@ -1298,7 +1338,7 @@  long hmm_range_dma_unmap(struct hmm_range *range,
 
 	page_size = hmm_range_page_size(range);
 	npages = (range->end - range->start) >> range->page_shift;
-	for (i = 0; i < npages; ++i) {
+	for (i = 0, addr = range->start; i < npages; ++i, addr += page_size) {
 		enum dma_data_direction dir = DMA_FROM_DEVICE;
 		struct page *page;
 
@@ -1318,6 +1358,19 @@  long hmm_range_dma_unmap(struct hmm_range *range,
 				set_page_dirty(page);
 		}
 
+		if (is_device_private_page(page)) {
+			struct hmm_devmem *devmem = page->pgmap->data;
+			unsigned long ret;
+
+			BUG_ON(!devmem->ops->p2p_unmap);
+
+			ret = devmem->ops->p2p_unmap(devmem, range, device,
+						     addr, &daddrs[i]);
+			BUG_ON(ret > npages);
+			i += ret;
+			continue;
+		}
+
 		/* Unmap and clear pfns/dma address */
 		dma_unmap_page(device, daddrs[i], page_size, dir);
 		range->pfns[i] = range->values[HMM_PFN_NONE];