diff mbox series

[v4,1/2] mm: Add a vmf_insert_mixed_prot() function

Message ID 20191212084741.9251-2-thomas_os@shipmail.org (mailing list archive)
State New, archived
Headers show
Series mm, drm/ttm: Fix pte insertion with customized protection | expand

Commit Message

Thomas Hellström (Intel) Dec. 12, 2019, 8:47 a.m. UTC
From: Thomas Hellstrom <thellstrom@vmware.com>

The TTM module today uses a hack to be able to set a different page
protection than struct vm_area_struct::vm_page_prot. To be able to do
this properly, add the needed vm functionality as vmf_insert_mixed_prot().

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Christian König" <christian.koenig@amd.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 include/linux/mm.h       |  2 ++
 include/linux/mm_types.h |  7 ++++++-
 mm/memory.c              | 43 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 47 insertions(+), 5 deletions(-)

Comments

Thomas Hellstrom Dec. 12, 2019, 8:51 a.m. UTC | #1
On 12/12/19 9:48 AM, Thomas Hellström (VMware) wrote:
> From: Thomas Hellstrom <thellstrom@vmware.com>
>
> The TTM module today uses a hack to be able to set a different page
> protection than struct vm_area_struct::vm_page_prot. To be able to do
> this properly, add the needed vm functionality as vmf_insert_mixed_prot().
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: "Jérôme Glisse" <jglisse@redhat.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> Acked-by: Christian König <christian.koenig@amd.com>
> ---
>  include/linux/mm.h       |  2 ++
>  include/linux/mm_types.h |  7 ++++++-
>  mm/memory.c              | 43 ++++++++++++++++++++++++++++++++++++----
>  3 files changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index cc292273e6ba..29575d3c1e47 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2548,6 +2548,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
>  			unsigned long pfn, pgprot_t pgprot);
>  vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
>  			pfn_t pfn);
> +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
> +			pfn_t pfn, pgprot_t pgprot);
>  vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
>  		unsigned long addr, pfn_t pfn);
>  int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 2222fa795284..ac96afdbb4bc 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -307,7 +307,12 @@ struct vm_area_struct {
>  	/* Second cache line starts here. */
>  
>  	struct mm_struct *vm_mm;	/* The address space we belong to. */
> -	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
> +
> +	/*
> +	 * Access permissions of this VMA.
> +	 * See vmf_insert_mixed() for discussion.

Typo. will of course be vmf_insert_mixed_prot() in the final version.
Michal Hocko Dec. 20, 2019, 8:23 a.m. UTC | #2
On Thu 12-12-19 09:47:40, Thomas Hellström (VMware) wrote:
> From: Thomas Hellstrom <thellstrom@vmware.com>
> 
> The TTM module today uses a hack to be able to set a different page
> protection than struct vm_area_struct::vm_page_prot. To be able to do
> this properly, add the needed vm functionality as vmf_insert_mixed_prot().
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: "Jérôme Glisse" <jglisse@redhat.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> Acked-by: Christian König <christian.koenig@amd.com>

I cannot say I am happy about this because it adds a discrepancy and
that is always tricky but I do agree that a formalized discrepancy is
better than ad-hoc hacks so

Acked-by: Michal Hocko <mhocko@suse.com>

Thanks for extending the documentation.

> ---
>  include/linux/mm.h       |  2 ++
>  include/linux/mm_types.h |  7 ++++++-
>  mm/memory.c              | 43 ++++++++++++++++++++++++++++++++++++----
>  3 files changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index cc292273e6ba..29575d3c1e47 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2548,6 +2548,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
>  			unsigned long pfn, pgprot_t pgprot);
>  vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
>  			pfn_t pfn);
> +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
> +			pfn_t pfn, pgprot_t pgprot);
>  vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
>  		unsigned long addr, pfn_t pfn);
>  int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 2222fa795284..ac96afdbb4bc 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -307,7 +307,12 @@ struct vm_area_struct {
>  	/* Second cache line starts here. */
>  
>  	struct mm_struct *vm_mm;	/* The address space we belong to. */
> -	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
> +
> +	/*
> +	 * Access permissions of this VMA.
> +	 * See vmf_insert_mixed() for discussion.
> +	 */
> +	pgprot_t vm_page_prot;
>  	unsigned long vm_flags;		/* Flags, see mm.h. */
>  
>  	/*
> diff --git a/mm/memory.c b/mm/memory.c
> index b1ca51a079f2..269a8a871e83 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1646,6 +1646,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
>   * vmf_insert_pfn_prot should only be used if using multiple VMAs is
>   * impractical.
>   *
> + * See vmf_insert_mixed_prot() for a discussion of the implication of using
> + * a value of @pgprot different from that of @vma->vm_page_prot.
> + *
>   * Context: Process context.  May allocate using %GFP_KERNEL.
>   * Return: vm_fault_t value.
>   */
> @@ -1719,9 +1722,9 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
>  }
>  
>  static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
> -		unsigned long addr, pfn_t pfn, bool mkwrite)
> +		unsigned long addr, pfn_t pfn, pgprot_t pgprot,
> +		bool mkwrite)
>  {
> -	pgprot_t pgprot = vma->vm_page_prot;
>  	int err;
>  
>  	BUG_ON(!vm_mixed_ok(vma, pfn));
> @@ -1764,10 +1767,42 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
>  	return VM_FAULT_NOPAGE;
>  }
>  
> +/**
> + * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot
> + * @vma: user vma to map to
> + * @addr: target user address of this page
> + * @pfn: source kernel pfn
> + * @pgprot: pgprot flags for the inserted page
> + *
> + * This is exactly like vmf_insert_mixed(), except that it allows drivers to
> + * to override pgprot on a per-page basis.
> + *
> + * Typically this function should be used by drivers to set caching- and
> + * encryption bits different than those of @vma->vm_page_prot, because
> + * the caching- or encryption mode may not be known at mmap() time.
> + * This is ok as long as @vma->vm_page_prot is not used by the core vm
> + * to set caching and encryption bits for those vmas (except for COW pages).
> + * This is ensured by core vm only modifying these page table entries using
> + * functions that don't touch caching- or encryption bits, using pte_modify()
> + * if needed. (See for example mprotect()).
> + * Also when new page-table entries are created, this is only done using the
> + * fault() callback, and never using the value of vma->vm_page_prot,
> + * except for page-table entries that point to anonymous pages as the result
> + * of COW.
> + *
> + * Context: Process context.  May allocate using %GFP_KERNEL.
> + * Return: vm_fault_t value.
> + */
> +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
> +				 pfn_t pfn, pgprot_t pgprot)
> +{
> +	return __vm_insert_mixed(vma, addr, pfn, pgprot, false);
> +}
> +
>  vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
>  		pfn_t pfn)
>  {
> -	return __vm_insert_mixed(vma, addr, pfn, false);
> +	return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false);
>  }
>  EXPORT_SYMBOL(vmf_insert_mixed);
>  
> @@ -1779,7 +1814,7 @@ EXPORT_SYMBOL(vmf_insert_mixed);
>  vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
>  		unsigned long addr, pfn_t pfn)
>  {
> -	return __vm_insert_mixed(vma, addr, pfn, true);
> +	return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true);
>  }
>  EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);
>  
> -- 
> 2.21.0
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc292273e6ba..29575d3c1e47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2548,6 +2548,8 @@  vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
+vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
+			pfn_t pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
 		unsigned long addr, pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2222fa795284..ac96afdbb4bc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -307,7 +307,12 @@  struct vm_area_struct {
 	/* Second cache line starts here. */
 
 	struct mm_struct *vm_mm;	/* The address space we belong to. */
-	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
+
+	/*
+	 * Access permissions of this VMA.
+	 * See vmf_insert_mixed() for discussion.
+	 */
+	pgprot_t vm_page_prot;
 	unsigned long vm_flags;		/* Flags, see mm.h. */
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index b1ca51a079f2..269a8a871e83 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1646,6 +1646,9 @@  static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
  * vmf_insert_pfn_prot should only be used if using multiple VMAs is
  * impractical.
  *
+ * See vmf_insert_mixed_prot() for a discussion of the implication of using
+ * a value of @pgprot different from that of @vma->vm_page_prot.
+ *
  * Context: Process context.  May allocate using %GFP_KERNEL.
  * Return: vm_fault_t value.
  */
@@ -1719,9 +1722,9 @@  static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
 }
 
 static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
-		unsigned long addr, pfn_t pfn, bool mkwrite)
+		unsigned long addr, pfn_t pfn, pgprot_t pgprot,
+		bool mkwrite)
 {
-	pgprot_t pgprot = vma->vm_page_prot;
 	int err;
 
 	BUG_ON(!vm_mixed_ok(vma, pfn));
@@ -1764,10 +1767,42 @@  static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 }
 
+/**
+ * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vmf_insert_mixed(), except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * Typically this function should be used by drivers to set caching- and
+ * encryption bits different than those of @vma->vm_page_prot, because
+ * the caching- or encryption mode may not be known at mmap() time.
+ * This is ok as long as @vma->vm_page_prot is not used by the core vm
+ * to set caching and encryption bits for those vmas (except for COW pages).
+ * This is ensured by core vm only modifying these page table entries using
+ * functions that don't touch caching- or encryption bits, using pte_modify()
+ * if needed. (See for example mprotect()).
+ * Also when new page-table entries are created, this is only done using the
+ * fault() callback, and never using the value of vma->vm_page_prot,
+ * except for page-table entries that point to anonymous pages as the result
+ * of COW.
+ *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
+				 pfn_t pfn, pgprot_t pgprot)
+{
+	return __vm_insert_mixed(vma, addr, pfn, pgprot, false);
+}
+
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 		pfn_t pfn)
 {
-	return __vm_insert_mixed(vma, addr, pfn, false);
+	return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false);
 }
 EXPORT_SYMBOL(vmf_insert_mixed);
 
@@ -1779,7 +1814,7 @@  EXPORT_SYMBOL(vmf_insert_mixed);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
 		unsigned long addr, pfn_t pfn)
 {
-	return __vm_insert_mixed(vma, addr, pfn, true);
+	return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true);
 }
 EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);