diff mbox series

[02/39] drm/i915/gem: Use set_pte_at() for assigning the vmapped PTE

Message ID 20200826132811.17577-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/39] drm/i915/gem: Avoid implicit vmap for highmem on x86-32 | expand

Commit Message

Chris Wilson Aug. 26, 2020, 1:27 p.m. UTC
Use set_pte_at() to assign the PTE pointer returned by alloc_vm_area(),
rather than a direct assignment.

Fixes: 6056e50033d9 ("drm/i915/gem: Support discontiguous lmem object maps")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 33 +++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

Comments

Matthew Auld Aug. 26, 2020, 4:36 p.m. UTC | #1
On Wed, 26 Aug 2020 at 14:28, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Use set_pte_at() to assign the PTE pointer returned by alloc_vm_area(),
> rather than a direct assignment.

and crucially add the missing sync stuff for the mapping?

>
> Fixes: 6056e50033d9 ("drm/i915/gem: Support discontiguous lmem object maps")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c | 33 +++++++++++++++++++----
>  1 file changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 51b63e05dbe4..0c3d0d6429ae 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -241,6 +241,17 @@ static inline pte_t iomap_pte(resource_size_t base,
>         return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
>  }
>
> +static void sync_vm_area(struct vm_struct *area)
> +{
> +       unsigned long start = (unsigned long)area->addr;
> +       unsigned long end = start + area->size;
> +
> +       if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PTE_MODIFIED)
> +               arch_sync_kernel_mappings(start, end); /* expected DCE */

What is DCE again?

> +
> +       flush_cache_vmap(start, end);
> +}
> +
>  /* The 'mapping' part of i915_gem_object_pin_map() below */
>  static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
>                                  enum i915_map_type type)
> @@ -308,24 +319,36 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
>         }
>
>         if (i915_gem_object_has_struct_page(obj)) {
> +               unsigned long addr = (unsigned long)area->addr;
>                 struct sgt_iter iter;
>                 struct page *page;
>                 pte_t **ptes = mem;
>
> -               for_each_sgt_page(page, iter, sgt)
> -                       **ptes++ = mk_pte(page, pgprot);
> +               for_each_sgt_page(page, iter, sgt) {
> +                       set_pte_at(&init_mm, addr, *ptes, mk_pte(page, pgprot));

init_mm needs to be exported?

> +                       addr += PAGE_SIZE;
> +                       ptes++;
> +               }
> +               GEM_BUG_ON(addr != (unsigned long)area->addr + obj->base.size);
>         } else {
> +               unsigned long addr = (unsigned long)area->addr;
>                 resource_size_t iomap;
>                 struct sgt_iter iter;
>                 pte_t **ptes = mem;
> -               dma_addr_t addr;
> +               dma_addr_t offset;
>
>                 iomap = obj->mm.region->iomap.base;
>                 iomap -= obj->mm.region->region.start;
>
> -               for_each_sgt_daddr(addr, iter, sgt)
> -                       **ptes++ = iomap_pte(iomap, addr, pgprot);
> +               for_each_sgt_daddr(offset, iter, sgt) {
> +                       set_pte_at(&init_mm, addr, *ptes,
> +                                  iomap_pte(iomap, offset, pgprot));
> +                       addr += PAGE_SIZE;
> +                       ptes++;
> +               }
> +               GEM_BUG_ON(addr != (unsigned long)area->addr + obj->base.size);
>         }
> +       sync_vm_area(area);
>
>         if (mem != stack)
>                 kvfree(mem);
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Aug. 26, 2020, 4:55 p.m. UTC | #2
Quoting Matthew Auld (2020-08-26 17:36:52)
> On Wed, 26 Aug 2020 at 14:28, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Use set_pte_at() to assign the PTE pointer returned by alloc_vm_area(),
> > rather than a direct assignment.
> 
> and crucially add the missing sync stuff for the mapping?

Fortunately not for x86.

> > Fixes: 6056e50033d9 ("drm/i915/gem: Support discontiguous lmem object maps")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_pages.c | 33 +++++++++++++++++++----
> >  1 file changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > index 51b63e05dbe4..0c3d0d6429ae 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > @@ -241,6 +241,17 @@ static inline pte_t iomap_pte(resource_size_t base,
> >         return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
> >  }
> >
> > +static void sync_vm_area(struct vm_struct *area)
> > +{
> > +       unsigned long start = (unsigned long)area->addr;
> > +       unsigned long end = start + area->size;
> > +
> > +       if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PTE_MODIFIED)
> > +               arch_sync_kernel_mappings(start, end); /* expected DCE */
> 
> What is DCE again?

"Dead code eliminated." arch_sync_kernel_mappings() is not exported, so
if a platform were to require us to call it, we have a problem.

I figured it would be better get a complaint and know the problem
exists, having had the set_pmd() issue bite once.

I expect if the problem does arise, the alloc_vm_area() interface will
be changed to remove set_pte_at() from drivers. Just my guess. It could
just be as simple as removing that pte[] and requiring us to use
apply_to_page_range directly.

> > +
> > +       flush_cache_vmap(start, end);
> > +}
> > +
> >  /* The 'mapping' part of i915_gem_object_pin_map() below */
> >  static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
> >                                  enum i915_map_type type)
> > @@ -308,24 +319,36 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
> >         }
> >
> >         if (i915_gem_object_has_struct_page(obj)) {
> > +               unsigned long addr = (unsigned long)area->addr;
> >                 struct sgt_iter iter;
> >                 struct page *page;
> >                 pte_t **ptes = mem;
> >
> > -               for_each_sgt_page(page, iter, sgt)
> > -                       **ptes++ = mk_pte(page, pgprot);
> > +               for_each_sgt_page(page, iter, sgt) {
> > +                       set_pte_at(&init_mm, addr, *ptes, mk_pte(page, pgprot));
> 
> init_mm needs to be exported?

Odd, because I recall that's why we didn't use it previously... But it
compiles with i915.ko as a module. Hmm, that probably means set_pte_at()
expands to a macro that doesn't use init_mm on x86. Again, that will
cause a problem no doubt somewhere else, where it should be a problem
for whatever reason the mm_struct is required for writing the PTE.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 51b63e05dbe4..0c3d0d6429ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -241,6 +241,17 @@  static inline pte_t iomap_pte(resource_size_t base,
 	return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
 }
 
+static void sync_vm_area(struct vm_struct *area)
+{
+	unsigned long start = (unsigned long)area->addr;
+	unsigned long end = start + area->size;
+
+	if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PTE_MODIFIED)
+		arch_sync_kernel_mappings(start, end); /* expected DCE */
+
+	flush_cache_vmap(start, end);
+}
+
 /* The 'mapping' part of i915_gem_object_pin_map() below */
 static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 				 enum i915_map_type type)
@@ -308,24 +319,36 @@  static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 	}
 
 	if (i915_gem_object_has_struct_page(obj)) {
+		unsigned long addr = (unsigned long)area->addr;
 		struct sgt_iter iter;
 		struct page *page;
 		pte_t **ptes = mem;
 
-		for_each_sgt_page(page, iter, sgt)
-			**ptes++ = mk_pte(page, pgprot);
+		for_each_sgt_page(page, iter, sgt) {
+			set_pte_at(&init_mm, addr, *ptes, mk_pte(page, pgprot));
+			addr += PAGE_SIZE;
+			ptes++;
+		}
+		GEM_BUG_ON(addr != (unsigned long)area->addr + obj->base.size);
 	} else {
+		unsigned long addr = (unsigned long)area->addr;
 		resource_size_t iomap;
 		struct sgt_iter iter;
 		pte_t **ptes = mem;
-		dma_addr_t addr;
+		dma_addr_t offset;
 
 		iomap = obj->mm.region->iomap.base;
 		iomap -= obj->mm.region->region.start;
 
-		for_each_sgt_daddr(addr, iter, sgt)
-			**ptes++ = iomap_pte(iomap, addr, pgprot);
+		for_each_sgt_daddr(offset, iter, sgt) {
+			set_pte_at(&init_mm, addr, *ptes,
+				   iomap_pte(iomap, offset, pgprot));
+			addr += PAGE_SIZE;
+			ptes++;
+		}
+		GEM_BUG_ON(addr != (unsigned long)area->addr + obj->base.size);
 	}
+	sync_vm_area(area);
 
 	if (mem != stack)
 		kvfree(mem);