diff mbox series

[v6,03/13] PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset

Message ID 20180913001156.4115-4-logang@deltatee.com (mailing list archive)
State New, archived
Headers show
Series Copy Offload in NVMe Fabrics with P2P PCI Memory | expand

Commit Message

Logan Gunthorpe Sept. 13, 2018, 12:11 a.m. UTC
The DMA address used when mapping PCI P2P memory must be the PCI bus
address. Thus, introduce pci_p2pmem_map_sg() to map the correct
addresses when using P2P memory. Memory mapped in this way does not
need to be unmapped.

For this, we assume that an SGL passed to these functions contain all
P2P memory or no P2P memory.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/pci/p2pdma.c       | 43 ++++++++++++++++++++++++++++++++++++++
 include/linux/memremap.h   |  1 +
 include/linux/pci-p2pdma.h |  7 +++++++
 3 files changed, 51 insertions(+)

Comments

Bjorn Helgaas Sept. 21, 2018, 1:15 p.m. UTC | #1
On Wed, Sep 12, 2018 at 06:11:46PM -0600, Logan Gunthorpe wrote:
> The DMA address used when mapping PCI P2P memory must be the PCI bus
> address. Thus, introduce pci_p2pmem_map_sg() to map the correct
> addresses when using P2P memory. Memory mapped in this way does not
> need to be unmapped.

I think the use of "map" in this context is slightly confusing because the
general expectation is that map/unmap must be balanced.

I assume it's because the "mapping" consumes no resources, e.g., requires
no page table entries.  Possibly there's a better verb than "map", e.g.,
"convert", "convert_to_p2pdma", etc?

If you keep "map", maybe add a sentence or two about why there's no
corresponding unmap?

> For this, we assume that an SGL passed to these functions contain all
> P2P memory or no P2P memory.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

> ---
>  drivers/pci/p2pdma.c       | 43 ++++++++++++++++++++++++++++++++++++++
>  include/linux/memremap.h   |  1 +
>  include/linux/pci-p2pdma.h |  7 +++++++
>  3 files changed, 51 insertions(+)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 67c1daf1189e..29bd40a87768 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -191,6 +191,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
>  	pgmap->res.flags = pci_resource_flags(pdev, bar);
>  	pgmap->ref = &pdev->p2pdma->devmap_ref;
>  	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
> +	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
> +		pci_resource_start(pdev, bar);
>  
>  	addr = devm_memremap_pages(&pdev->dev, pgmap);
>  	if (IS_ERR(addr)) {
> @@ -813,3 +815,44 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
>  	pdev->p2pdma->p2pmem_published = publish;
>  }
>  EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
> +
> +/**
> + * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
> + * @dev: device doing the DMA request
> + * @sg: scatter list to map
> + * @nents: elements in the scatterlist
> + * @dir: DMA direction
> + *
> + * Scatterlists mapped with this function should not be unmapped in any way.
> + *
> + * Returns the number of SG entries mapped or 0 on error.
> + */
> +int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +		      enum dma_data_direction dir)
> +{
> +	struct dev_pagemap *pgmap;
> +	struct scatterlist *s;
> +	phys_addr_t paddr;
> +	int i;
> +
> +	/*
> +	 * p2pdma mappings are not compatible with devices that use
> +	 * dma_virt_ops. If the upper layers do the right thing
> +	 * this should never happen because it will be prevented
> +	 * by the check in pci_p2pdma_add_client()
> +	 */
> +	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
> +			 dev->dma_ops == &dma_virt_ops))
> +		return 0;
> +
> +	for_each_sg(sg, s, nents, i) {
> +		pgmap = sg_page(s)->pgmap;
> +		paddr = sg_phys(s);
> +
> +		s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
> +		sg_dma_len(s) = s->length;
> +	}
> +
> +	return nents;
> +}
> +EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 9553370ebdad..0ac69ddf5fc4 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -125,6 +125,7 @@ struct dev_pagemap {
>  	struct device *dev;
>  	void *data;
>  	enum memory_type type;
> +	u64 pci_p2pdma_bus_offset;
>  };
>  
>  #ifdef CONFIG_ZONE_DEVICE
> diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
> index 7b2b0f547528..2f03dbbf5af6 100644
> --- a/include/linux/pci-p2pdma.h
> +++ b/include/linux/pci-p2pdma.h
> @@ -36,6 +36,8 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
>  					 unsigned int *nents, u32 length);
>  void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
>  void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
> +int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +		      enum dma_data_direction dir);
>  #else /* CONFIG_PCI_P2PDMA */
>  static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
>  		size_t size, u64 offset)
> @@ -98,5 +100,10 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
>  static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
>  {
>  }
> +static inline int pci_p2pdma_map_sg(struct device *dev,
> +		struct scatterlist *sg, int nents, enum dma_data_direction dir)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_PCI_P2PDMA */
>  #endif /* _LINUX_PCI_P2P_H */
> -- 
> 2.19.0
>
Bjorn Helgaas Sept. 21, 2018, 4:48 p.m. UTC | #2
On Fri, Sep 21, 2018 at 08:15:50AM -0500, Bjorn Helgaas wrote:
> On Wed, Sep 12, 2018 at 06:11:46PM -0600, Logan Gunthorpe wrote:
> > The DMA address used when mapping PCI P2P memory must be the PCI bus
> > address. Thus, introduce pci_p2pmem_map_sg() to map the correct
> > addresses when using P2P memory. Memory mapped in this way does not
> > need to be unmapped.
> 
> I think the use of "map" in this context is slightly confusing because the
> general expectation is that map/unmap must be balanced.
> 
> I assume it's because the "mapping" consumes no resources, e.g., requires
> no page table entries.  Possibly there's a better verb than "map", e.g.,
> "convert", "convert_to_p2pdma", etc?
> 
> If you keep "map", maybe add a sentence or two about why there's no
> corresponding unmap?

Another wrinkle is that "map" usually takes an A and gives you back a
B.  Now the caller has both A and B and both are still valid.

Here we pass in an SGL and the SGL is transformed, so the caller only
has B and A has been destroyed, i.e., the SGL can no longer be used as
it was before, and there's no way to get A back.

Maybe this is pointless bikeshedding, so if your color is the best,
don't change anything.

Bjorn
Logan Gunthorpe Sept. 21, 2018, 6:13 p.m. UTC | #3
On 2018-09-21 10:48 AM, Bjorn Helgaas wrote:
>> I think the use of "map" in this context is slightly confusing because the
>> general expectation is that map/unmap must be balanced.

Yeah, Jason said the same thing, but having an empty unmap function
seems wasteful and Christoph said to just remove it. My opinion is that
it's not that big an issue one way or another -- if we have to add an
unmap later it's not really that hard.

>> If you keep "map", maybe add a sentence or two about why there's no
>> corresponding unmap?

Will do.

> Another wrinkle is that "map" usually takes an A and gives you back a
> B.  Now the caller has both A and B and both are still valid.
> Here we pass in an SGL and the SGL is transformed, so the caller only
> has B and A has been destroyed, i.e., the SGL can no longer be used as
> it was before, and there's no way to get A back.

I wouldn't say that. Our map_sg function is doing the same thing
dma_map_sg is: it sets the DMA address and length in the scatter list.
So B is still A just with other fields set. If the caller wanted to map
this SG in a different way they can still do so and the new DMA
address/length would override the old values. (Normally, you'd want to
unmap before doing something like that, but seeing our unmap is an empty
operation, we wouldn't have to do that.)

Logan
Bjorn Helgaas Sept. 21, 2018, 8 p.m. UTC | #4
On Fri, Sep 21, 2018 at 12:13:21PM -0600, Logan Gunthorpe wrote:
> On 2018-09-21 10:48 AM, Bjorn Helgaas wrote:
> >> I think the use of "map" in this context is slightly confusing because the
> >> general expectation is that map/unmap must be balanced.
> 
> Yeah, Jason said the same thing, but having an empty unmap function
> seems wasteful and Christoph said to just remove it. My opinion is that
> it's not that big an issue one way or another -- if we have to add an
> unmap later it's not really that hard.
> 
> >> If you keep "map", maybe add a sentence or two about why there's no
> >> corresponding unmap?
> 
> Will do.
> 
> > Another wrinkle is that "map" usually takes an A and gives you back a
> > B.  Now the caller has both A and B and both are still valid.
> > Here we pass in an SGL and the SGL is transformed, so the caller only
> > has B and A has been destroyed, i.e., the SGL can no longer be used as
> > it was before, and there's no way to get A back.
> 
> I wouldn't say that. Our map_sg function is doing the same thing
> dma_map_sg is: it sets the DMA address and length in the scatter list.
> So B is still A just with other fields set. If the caller wanted to map
> this SG in a different way they can still do so and the new DMA
> address/length would override the old values. (Normally, you'd want to
> unmap before doing something like that, but seeing our unmap is an empty
> operation, we wouldn't have to do that.)

Ok.  I was assuming s->dma_address would have been already set before
the call and would be overwritten by pci_p2pmem_map_sg().  But I guess
that's not the case -- sounds like s->dma_address is undefined before
the call.

Bjorn
Logan Gunthorpe Sept. 21, 2018, 8:01 p.m. UTC | #5
On 2018-09-21 2:00 PM, Bjorn Helgaas wrote:
> Ok.  I was assuming s->dma_address would have been already set before
> the call and would be overwritten by pci_p2pmem_map_sg().  But I guess
> that's not the case -- sounds like s->dma_address is undefined before
> the call.

Correct. It's set by whatever dma_map function operates on it.

Logan
diff mbox series

Patch

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 67c1daf1189e..29bd40a87768 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -191,6 +191,8 @@  int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 	pgmap->res.flags = pci_resource_flags(pdev, bar);
 	pgmap->ref = &pdev->p2pdma->devmap_ref;
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+		pci_resource_start(pdev, bar);
 
 	addr = devm_memremap_pages(&pdev->dev, pgmap);
 	if (IS_ERR(addr)) {
@@ -813,3 +815,44 @@  void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 	pdev->p2pdma->p2pmem_published = publish;
 }
 EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ *
+ * Scatterlists mapped with this function should not be unmapped in any way.
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		      enum dma_data_direction dir)
+{
+	struct dev_pagemap *pgmap;
+	struct scatterlist *s;
+	phys_addr_t paddr;
+	int i;
+
+	/*
+	 * p2pdma mappings are not compatible with devices that use
+	 * dma_virt_ops. If the upper layers do the right thing
+	 * this should never happen because it will be prevented
+	 * by the check in pci_p2pdma_add_client()
+	 */
+	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
+			 dev->dma_ops == &dma_virt_ops))
+		return 0;
+
+	for_each_sg(sg, s, nents, i) {
+		pgmap = sg_page(s)->pgmap;
+		paddr = sg_phys(s);
+
+		s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+		sg_dma_len(s) = s->length;
+	}
+
+	return nents;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 9553370ebdad..0ac69ddf5fc4 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -125,6 +125,7 @@  struct dev_pagemap {
 	struct device *dev;
 	void *data;
 	enum memory_type type;
+	u64 pci_p2pdma_bus_offset;
 };
 
 #ifdef CONFIG_ZONE_DEVICE
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 7b2b0f547528..2f03dbbf5af6 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -36,6 +36,8 @@  struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
 					 unsigned int *nents, u32 length);
 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		      enum dma_data_direction dir);
 #else /* CONFIG_PCI_P2PDMA */
 static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
 		size_t size, u64 offset)
@@ -98,5 +100,10 @@  static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 {
 }
+static inline int pci_p2pdma_map_sg(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_P2PDMA */
 #endif /* _LINUX_PCI_P2P_H */