diff mbox series

[v3,04/20] PCI/P2PDMA: introduce helpers for dma_map_sg implementations

Message ID 20210916234100.122368-5-logang@deltatee.com (mailing list archive)
State New, archived
Headers show
Series Userspace P2PDMA with O_DIRECT NVMe devices | expand

Commit Message

Logan Gunthorpe Sept. 16, 2021, 11:40 p.m. UTC
Add pci_p2pdma_map_segment() as a helper for simple dma_map_sg()
implementations. It takes an scatterlist segment that must point to a
pci_p2pdma struct page and will map it if the mapping requires a bus
address.

The return value indicates whether the mapping required a bus address
or whether the caller still needs to map the segment normally. If the
segment should not be mapped, -EREMOTEIO is returned.

This helper uses a state structure to track the changes to the
pgmap across calls and avoid needing to lookup into the xarray for
every page.

Also add pci_p2pdma_map_bus_segment() which is useful for IOMMU
dma_map_sg() implementations where the sg segment containing the page
differs from the sg segment containing the DMA address.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/pci/p2pdma.c       | 59 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci-p2pdma.h | 21 ++++++++++++++
 2 files changed, 80 insertions(+)

Comments

Bjorn Helgaas Sept. 27, 2021, 6:53 p.m. UTC | #1
On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:
> Add pci_p2pdma_map_segment() as a helper for simple dma_map_sg()
> implementations. It takes an scatterlist segment that must point to a
> pci_p2pdma struct page and will map it if the mapping requires a bus
> address.
> 
> The return value indicates whether the mapping required a bus address
> or whether the caller still needs to map the segment normally. If the
> segment should not be mapped, -EREMOTEIO is returned.
> 
> This helper uses a state structure to track the changes to the
> pgmap across calls and avoid needing to lookup into the xarray for
> every page.
> 
> Also add pci_p2pdma_map_bus_segment() which is useful for IOMMU
> dma_map_sg() implementations where the sg segment containing the page
> differs from the sg segment containing the DMA address.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

Ditto.

> ---
>  drivers/pci/p2pdma.c       | 59 ++++++++++++++++++++++++++++++++++++++
>  include/linux/pci-p2pdma.h | 21 ++++++++++++++
>  2 files changed, 80 insertions(+)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index b656d8c801a7..58c34f1f1473 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -943,6 +943,65 @@ void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
>  }
>  EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
>  
> +/**
> + * pci_p2pdma_map_segment - map an sg segment determining the mapping type
> + * @state: State structure that should be declared outside of the for_each_sg()
> + *	loop and initialized to zero.
> + * @dev: DMA device that's doing the mapping operation
> + * @sg: scatterlist segment to map
> + *
> + * This is a helper to be used by non-iommu dma_map_sg() implementations where
> + * the sg segment is the same for the page_link and the dma_address.

s/non-iommu/non-IOMMU/

> + *
> + * Attempt to map a single segment in an SGL with the PCI bus address.
> + * The segment must point to a PCI P2PDMA page and thus must be
> + * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
> + *
> + * Returns the type of mapping used and maps the page if the type is
> + * PCI_P2PDMA_MAP_BUS_ADDR.
> + */
> +enum pci_p2pdma_map_type
> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> +		       struct scatterlist *sg)
> +{
> +	if (state->pgmap != sg_page(sg)->pgmap) {
> +		state->pgmap = sg_page(sg)->pgmap;
> +		state->map = pci_p2pdma_map_type(state->pgmap, dev);
> +		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
> +	}
> +
> +	if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) {
> +		sg->dma_address = sg_phys(sg) + state->bus_off;
> +		sg_dma_len(sg) = sg->length;
> +		sg_dma_mark_pci_p2pdma(sg);
> +	}
> +
> +	return state->map;
> +}
> +
> +/**
> + * pci_p2pdma_map_bus_segment - map an sg segment pre determined to
> + *	be mapped with PCI_P2PDMA_MAP_BUS_ADDR
> + * @pg_sg: scatterlist segment with the page to map
> + * @dma_sg: scatterlist segment to assign a dma address to

s/dma address/DMA address/, also below

> + *
> + * This is a helper for iommu dma_map_sg() implementations when the
> + * segment for the dma address differs from the segment containing the
> + * source page.
> + *
> + * pci_p2pdma_map_type() must have already been called on the pg_sg and
> + * returned PCI_P2PDMA_MAP_BUS_ADDR.
> + */
> +void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
> +				struct scatterlist *dma_sg)
> +{
> +	struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(sg_page(pg_sg)->pgmap);
> +
> +	dma_sg->dma_address = sg_phys(pg_sg) + pgmap->bus_offset;
> +	sg_dma_len(dma_sg) = pg_sg->length;
> +	sg_dma_mark_pci_p2pdma(dma_sg);
> +}
> +
>  /**
>   * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
>   *		to enable p2pdma
> diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
> index caac2d023f8f..e5a8d5bc0f51 100644
> --- a/include/linux/pci-p2pdma.h
> +++ b/include/linux/pci-p2pdma.h
> @@ -13,6 +13,12 @@
>  
>  #include <linux/pci.h>
>  
> +struct pci_p2pdma_map_state {
> +	struct dev_pagemap *pgmap;
> +	int map;
> +	u64 bus_off;
> +};
> +
>  struct block_device;
>  struct scatterlist;
>  
> @@ -70,6 +76,11 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
>  		int nents, enum dma_data_direction dir, unsigned long attrs);
>  void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
>  		int nents, enum dma_data_direction dir, unsigned long attrs);
> +enum pci_p2pdma_map_type
> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> +		       struct scatterlist *sg);
> +void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
> +				struct scatterlist *dma_sg);
>  int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
>  			    bool *use_p2pdma);
>  ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
> @@ -135,6 +146,16 @@ static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev,
>  		unsigned long attrs)
>  {
>  }
> +static inline enum pci_p2pdma_map_type
> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> +		       struct scatterlist *sg)
> +{
> +	return PCI_P2PDMA_MAP_NOT_SUPPORTED;
> +}
> +static inline void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
> +					      struct scatterlist *dma_sg)
> +{
> +}
>  static inline int pci_p2pdma_enable_store(const char *page,
>  		struct pci_dev **p2p_dev, bool *use_p2pdma)
>  {
> -- 
> 2.30.2
>
Logan Gunthorpe Sept. 27, 2021, 7:59 p.m. UTC | #2
On 2021-09-27 12:53 p.m., Bjorn Helgaas wrote:
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> 
> Ditto.

Thanks Bjorn, I'll make these changes and add your Acks for subsequent
postings.

Logan
Jason Gunthorpe Sept. 28, 2021, 6:55 p.m. UTC | #3
On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:
> Add pci_p2pdma_map_segment() as a helper for simple dma_map_sg()
> implementations. It takes an scatterlist segment that must point to a
> pci_p2pdma struct page and will map it if the mapping requires a bus
> address.
> 
> The return value indicates whether the mapping required a bus address
> or whether the caller still needs to map the segment normally. If the
> segment should not be mapped, -EREMOTEIO is returned.
> 
> This helper uses a state structure to track the changes to the
> pgmap across calls and avoid needing to lookup into the xarray for
> every page.
> 
> Also add pci_p2pdma_map_bus_segment() which is useful for IOMMU
> dma_map_sg() implementations where the sg segment containing the page
> differs from the sg segment containing the DMA address.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
>  drivers/pci/p2pdma.c       | 59 ++++++++++++++++++++++++++++++++++++++
>  include/linux/pci-p2pdma.h | 21 ++++++++++++++
>  2 files changed, 80 insertions(+)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index b656d8c801a7..58c34f1f1473 100644
> +++ b/drivers/pci/p2pdma.c
> @@ -943,6 +943,65 @@ void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
>  }
>  EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
>  
> +/**
> + * pci_p2pdma_map_segment - map an sg segment determining the mapping type
> + * @state: State structure that should be declared outside of the for_each_sg()
> + *	loop and initialized to zero.
> + * @dev: DMA device that's doing the mapping operation
> + * @sg: scatterlist segment to map
> + *
> + * This is a helper to be used by non-iommu dma_map_sg() implementations where
> + * the sg segment is the same for the page_link and the dma_address.
> + *
> + * Attempt to map a single segment in an SGL with the PCI bus address.
> + * The segment must point to a PCI P2PDMA page and thus must be
> + * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
> + *
> + * Returns the type of mapping used and maps the page if the type is
> + * PCI_P2PDMA_MAP_BUS_ADDR.
> + */
> +enum pci_p2pdma_map_type
> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> +		       struct scatterlist *sg)
> +{
> +	if (state->pgmap != sg_page(sg)->pgmap) {
> +		state->pgmap = sg_page(sg)->pgmap;
> +		state->map = pci_p2pdma_map_type(state->pgmap, dev);
> +		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
> +	}

Is this safe? I was just talking with Joao about this,

 https://lore.kernel.org/r/20210928180150.GI3544071@ziepe.ca

API wise I absolutely think this should be safe as written, but is it
really?

Does pgmap ensure that a positive refcount struct page always has a
valid pgmap pointer (and thus the mess in gup can be deleted) or does
this need to get the pgmap as well to keep it alive?

Jason
Jason Gunthorpe Sept. 28, 2021, 10:05 p.m. UTC | #4
On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:

> +enum pci_p2pdma_map_type
> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> +		       struct scatterlist *sg)
> +{
> +	if (state->pgmap != sg_page(sg)->pgmap) {
> +		state->pgmap = sg_page(sg)->pgmap;

This has built into it an assumption that every page in the sg element
has the same pgmap, but AFAIK nothing enforces this rule? There is no
requirement that the HW has pfn gaps between the pgmaps linux decides
to create over it.

At least sg_alloc_append_table_from_pages() and probably something in
the block world should be updated to not combine struct pages with
different pgmaps, and this should be documented in scatterlist.*
someplace.

Jason
Logan Gunthorpe Sept. 29, 2021, 9:26 p.m. UTC | #5
On 2021-09-28 12:55 p.m., Jason Gunthorpe wrote:
> On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:
>> Add pci_p2pdma_map_segment() as a helper for simple dma_map_sg()
>> implementations. It takes an scatterlist segment that must point to a
>> pci_p2pdma struct page and will map it if the mapping requires a bus
>> address.
>>
>> The return value indicates whether the mapping required a bus address
>> or whether the caller still needs to map the segment normally. If the
>> segment should not be mapped, -EREMOTEIO is returned.
>>
>> This helper uses a state structure to track the changes to the
>> pgmap across calls and avoid needing to lookup into the xarray for
>> every page.
>>
>> Also add pci_p2pdma_map_bus_segment() which is useful for IOMMU
>> dma_map_sg() implementations where the sg segment containing the page
>> differs from the sg segment containing the DMA address.
>>
>> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
>>  drivers/pci/p2pdma.c       | 59 ++++++++++++++++++++++++++++++++++++++
>>  include/linux/pci-p2pdma.h | 21 ++++++++++++++
>>  2 files changed, 80 insertions(+)
>>
>> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
>> index b656d8c801a7..58c34f1f1473 100644
>> +++ b/drivers/pci/p2pdma.c
>> @@ -943,6 +943,65 @@ void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
>>  }
>>  EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
>>  
>> +/**
>> + * pci_p2pdma_map_segment - map an sg segment determining the mapping type
>> + * @state: State structure that should be declared outside of the for_each_sg()
>> + *	loop and initialized to zero.
>> + * @dev: DMA device that's doing the mapping operation
>> + * @sg: scatterlist segment to map
>> + *
>> + * This is a helper to be used by non-iommu dma_map_sg() implementations where
>> + * the sg segment is the same for the page_link and the dma_address.
>> + *
>> + * Attempt to map a single segment in an SGL with the PCI bus address.
>> + * The segment must point to a PCI P2PDMA page and thus must be
>> + * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
>> + *
>> + * Returns the type of mapping used and maps the page if the type is
>> + * PCI_P2PDMA_MAP_BUS_ADDR.
>> + */
>> +enum pci_p2pdma_map_type
>> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
>> +		       struct scatterlist *sg)
>> +{
>> +	if (state->pgmap != sg_page(sg)->pgmap) {
>> +		state->pgmap = sg_page(sg)->pgmap;
>> +		state->map = pci_p2pdma_map_type(state->pgmap, dev);
>> +		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
>> +	}
> 
> Is this safe? I was just talking with Joao about this,
> 
>  https://lore.kernel.org/r/20210928180150.GI3544071@ziepe.ca
> 

I agree that taking the extra reference on the pagemap seems
unnecessary. However, it was convenient for the purposes of this
patchset to not have to check the page type for every page and only on
every new page map. But if we need to add a check directly to gup,
that'd probably be fine too.

> API wise I absolutely think this should be safe as written, but is it
> really?
> 
> Does pgmap ensure that a positive refcount struct page always has a
> valid pgmap pointer (and thus the mess in gup can be deleted) or does
> this need to get the pgmap as well to keep it alive?

Yes, the P2PDMA code ensures that the pgmap will not be deleted if there
is a positive refcount on any struct page.

LOgan
Logan Gunthorpe Sept. 29, 2021, 9:30 p.m. UTC | #6
On 2021-09-28 4:05 p.m., Jason Gunthorpe wrote:
> On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:
> 
>> +enum pci_p2pdma_map_type
>> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
>> +		       struct scatterlist *sg)
>> +{
>> +	if (state->pgmap != sg_page(sg)->pgmap) {
>> +		state->pgmap = sg_page(sg)->pgmap;
> 
> This has built into it an assumption that every page in the sg element
> has the same pgmap, but AFAIK nothing enforces this rule? There is no
> requirement that the HW has pfn gaps between the pgmaps linux decides
> to create over it.

No, that's not a correct reading of the code. Every time there is a new
pagemap, this code calculates the mapping type and bus offset. If a page
comes along with a different page map,f it recalculates. This just
reduces the overhead so that the calculation is done only every time a
page with a different pgmap comes along and not doing it for every
single page.

> At least sg_alloc_append_table_from_pages() and probably something in
> the block world should be updated to not combine struct pages with
> different pgmaps, and this should be documented in scatterlist.*
> someplace.

There's no sane place to do this check. The code is designed to support
mappings with different pgmaps.

Logan
Jason Gunthorpe Sept. 29, 2021, 10:46 p.m. UTC | #7
On Wed, Sep 29, 2021 at 03:30:42PM -0600, Logan Gunthorpe wrote:
> 
> 
> 
> On 2021-09-28 4:05 p.m., Jason Gunthorpe wrote:
> > On Thu, Sep 16, 2021 at 05:40:44PM -0600, Logan Gunthorpe wrote:
> > 
> >> +enum pci_p2pdma_map_type
> >> +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> >> +		       struct scatterlist *sg)
> >> +{
> >> +	if (state->pgmap != sg_page(sg)->pgmap) {
> >> +		state->pgmap = sg_page(sg)->pgmap;
> > 
> > This has built into it an assumption that every page in the sg element
> > has the same pgmap, but AFAIK nothing enforces this rule? There is no
> > requirement that the HW has pfn gaps between the pgmaps linux decides
> > to create over it.
> 
> No, that's not a correct reading of the code. Every time there is a new
> pagemap, this code calculates the mapping type and bus offset. If a page
> comes along with a different page map,f it recalculates. This just
> reduces the overhead so that the calculation is done only every time a
> page with a different pgmap comes along and not doing it for every
> single page.

Each 'struct scatterlist *sg' refers to a range of contiguous pfns
starting at page_to_pfn(sg_page()) and going for approx sg->length/PAGE_SIZE
pfns long.

sg_page() returns the first page, but nothing says that sg_page()+1
has the same pgmap.

The code in this patch does check the first page of each sg in a
larger sgl.

> > At least sg_alloc_append_table_from_pages() and probably something in
> > the block world should be updated to not combine struct pages with
> > different pgmaps, and this should be documented in scatterlist.*
> > someplace.
> 
> There's no sane place to do this check. The code is designed to support
> mappings with different pgmaps.

All places that generate compound sg's by aggregating multiple pages
need to include this check along side the check for physical
contiguity. There are not that many places but
sg_alloc_append_table_from_pages() is one of them:

@@ -470,7 +470,8 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
 
                /* Merge contiguous pages into the last SG */
                prv_len = sgt_append->prv->length;
-               while (n_pages && page_to_pfn(pages[0]) == paddr) {
+               while (n_pages && page_to_pfn(pages[0]) == paddr &&
+                      sg_page(sgt_append->prv)->pgmap == pages[0]->pgmap) {
                        if (sgt_append->prv->length + PAGE_SIZE > max_segment)
                                break;
                        sgt_append->prv->length += PAGE_SIZE;
@@ -488,7 +489,8 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
        for (i = 1; i < n_pages; i++) {
                seg_len += PAGE_SIZE;
                if (seg_len >= max_segment ||
-                   page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1) {
+                   page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1 ||
+                   pages[i]->pgmap != pages[i - 1]->pgmap) {
                        chunks++;
                        seg_len = 0;
                }
@@ -505,9 +507,10 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
                        seg_len += PAGE_SIZE;
                        if (seg_len >= max_segment ||
                            page_to_pfn(pages[j]) !=
-                           page_to_pfn(pages[j - 1]) + 1)
+                                   page_to_pfn(pages[j - 1]) + 1 ||
+                           pages[i]->pgmap != pages[i - 1]->pgmap) {
                                break;
-               }
+                       }
 
                /* Pass how many chunks might be left */
                s = get_next_sg(sgt_append, s, chunks - i + left_pages,
Logan Gunthorpe Sept. 29, 2021, 11 p.m. UTC | #8
On 2021-09-29 4:46 p.m., Jason Gunthorpe wrote:
> On Wed, Sep 29, 2021 at 03:30:42PM -0600, Logan Gunthorpe wrote:
>> On 2021-09-28 4:05 p.m., Jason Gunthorpe wrote:
>> No, that's not a correct reading of the code. Every time there is a new
>> pagemap, this code calculates the mapping type and bus offset. If a page
>> comes along with a different page map,f it recalculates. This just
>> reduces the overhead so that the calculation is done only every time a
>> page with a different pgmap comes along and not doing it for every
>> single page.
> 
> Each 'struct scatterlist *sg' refers to a range of contiguous pfns
> starting at page_to_pfn(sg_page()) and going for approx sg->length/PAGE_SIZE
> pfns long.
> 

Ugh, right. A bit contrived for consecutive pages to have different
pgmaps and still be next to each other in a DMA transaction. But I guess
it is technically possible and should be protected against.

> sg_page() returns the first page, but nothing says that sg_page()+1
> has the same pgmap.
> 
> The code in this patch does check the first page of each sg in a
> larger sgl.
> 
>>> At least sg_alloc_append_table_from_pages() and probably something in
>>> the block world should be updated to not combine struct pages with
>>> different pgmaps, and this should be documented in scatterlist.*
>>> someplace.
>>
>> There's no sane place to do this check. The code is designed to support
>> mappings with different pgmaps.
> 
> All places that generate compound sg's by aggregating multiple pages
> need to include this check along side the check for physical
> contiguity. There are not that many places but
> sg_alloc_append_table_from_pages() is one of them:

Yes. The block layer also does this. I believe a check in
page_is_mergable() will be sufficient there.

> @@ -470,7 +470,8 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
>  
>                 /* Merge contiguous pages into the last SG */
>                 prv_len = sgt_append->prv->length;
> -               while (n_pages && page_to_pfn(pages[0]) == paddr) {
> +               while (n_pages && page_to_pfn(pages[0]) == paddr &&
> +                      sg_page(sgt_append->prv)->pgmap == pages[0]->pgmap) {

I don't think it's correct to use pgmap without first checking if it is
a zone device page. But your point is taken. I'll try to address this.

Logan
Jason Gunthorpe Sept. 29, 2021, 11:40 p.m. UTC | #9
On Wed, Sep 29, 2021 at 05:00:43PM -0600, Logan Gunthorpe wrote:
> 
> 
> 
> On 2021-09-29 4:46 p.m., Jason Gunthorpe wrote:
> > On Wed, Sep 29, 2021 at 03:30:42PM -0600, Logan Gunthorpe wrote:
> >> On 2021-09-28 4:05 p.m., Jason Gunthorpe wrote:
> >> No, that's not a correct reading of the code. Every time there is a new
> >> pagemap, this code calculates the mapping type and bus offset. If a page
> >> comes along with a different page map,f it recalculates. This just
> >> reduces the overhead so that the calculation is done only every time a
> >> page with a different pgmap comes along and not doing it for every
> >> single page.
> > 
> > Each 'struct scatterlist *sg' refers to a range of contiguous pfns
> > starting at page_to_pfn(sg_page()) and going for approx sg->length/PAGE_SIZE
> > pfns long.
> > 
> 
> Ugh, right. A bit contrived for consecutive pages to have different
> pgmaps and still be next to each other in a DMA transaction. But I guess
> it is technically possible and should be protected against.

I worry it is something a hostile userspace could cookup using mmap
and cause some kind of kernel integrity problem with.

> > @@ -470,7 +470,8 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
> >  
> >                 /* Merge contiguous pages into the last SG */
> >                 prv_len = sgt_append->prv->length;
> > -               while (n_pages && page_to_pfn(pages[0]) == paddr) {
> > +               while (n_pages && page_to_pfn(pages[0]) == paddr &&
> > +                      sg_page(sgt_append->prv)->pgmap == pages[0]->pgmap) {
> 
> I don't think it's correct to use pgmap without first checking if it is
> a zone device page. But your point is taken. I'll try to address this.

Yes

Jason
diff mbox series

Patch

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index b656d8c801a7..58c34f1f1473 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -943,6 +943,65 @@  void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
 
+/**
+ * pci_p2pdma_map_segment - map an sg segment determining the mapping type
+ * @state: State structure that should be declared outside of the for_each_sg()
+ *	loop and initialized to zero.
+ * @dev: DMA device that's doing the mapping operation
+ * @sg: scatterlist segment to map
+ *
+ * This is a helper to be used by non-iommu dma_map_sg() implementations where
+ * the sg segment is the same for the page_link and the dma_address.
+ *
+ * Attempt to map a single segment in an SGL with the PCI bus address.
+ * The segment must point to a PCI P2PDMA page and thus must be
+ * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
+ *
+ * Returns the type of mapping used and maps the page if the type is
+ * PCI_P2PDMA_MAP_BUS_ADDR.
+ */
+enum pci_p2pdma_map_type
+pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
+		       struct scatterlist *sg)
+{
+	if (state->pgmap != sg_page(sg)->pgmap) {
+		state->pgmap = sg_page(sg)->pgmap;
+		state->map = pci_p2pdma_map_type(state->pgmap, dev);
+		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
+	}
+
+	if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) {
+		sg->dma_address = sg_phys(sg) + state->bus_off;
+		sg_dma_len(sg) = sg->length;
+		sg_dma_mark_pci_p2pdma(sg);
+	}
+
+	return state->map;
+}
+
+/**
+ * pci_p2pdma_map_bus_segment - map an sg segment pre determined to
+ *	be mapped with PCI_P2PDMA_MAP_BUS_ADDR
+ * @pg_sg: scatterlist segment with the page to map
+ * @dma_sg: scatterlist segment to assign a dma address to
+ *
+ * This is a helper for iommu dma_map_sg() implementations when the
+ * segment for the dma address differs from the segment containing the
+ * source page.
+ *
+ * pci_p2pdma_map_type() must have already been called on the pg_sg and
+ * returned PCI_P2PDMA_MAP_BUS_ADDR.
+ */
+void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
+				struct scatterlist *dma_sg)
+{
+	struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(sg_page(pg_sg)->pgmap);
+
+	dma_sg->dma_address = sg_phys(pg_sg) + pgmap->bus_offset;
+	sg_dma_len(dma_sg) = pg_sg->length;
+	sg_dma_mark_pci_p2pdma(dma_sg);
+}
+
 /**
  * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
  *		to enable p2pdma
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index caac2d023f8f..e5a8d5bc0f51 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -13,6 +13,12 @@ 
 
 #include <linux/pci.h>
 
+struct pci_p2pdma_map_state {
+	struct dev_pagemap *pgmap;
+	int map;
+	u64 bus_off;
+};
+
 struct block_device;
 struct scatterlist;
 
@@ -70,6 +76,11 @@  int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs);
 void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs);
+enum pci_p2pdma_map_type
+pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
+		       struct scatterlist *sg);
+void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
+				struct scatterlist *dma_sg);
 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
 			    bool *use_p2pdma);
 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
@@ -135,6 +146,16 @@  static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev,
 		unsigned long attrs)
 {
 }
+static inline enum pci_p2pdma_map_type
+pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
+		       struct scatterlist *sg)
+{
+	return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+}
+static inline void pci_p2pdma_map_bus_segment(struct scatterlist *pg_sg,
+					      struct scatterlist *dma_sg)
+{
+}
 static inline int pci_p2pdma_enable_store(const char *page,
 		struct pci_dev **p2p_dev, bool *use_p2pdma)
 {