diff mbox series

[v2,1/4] kernfs: remove page_mkwrite() from vm_operations_struct

Message ID 20240611182732.360317-2-martin.oliveira@eideticom.com (mailing list archive)
State Superseded
Headers show
Series Enable P2PDMA in Userspace RDMA | expand

Commit Message

Martin Oliveira June 11, 2024, 6:27 p.m. UTC
The .page_mkwrite operator of kernfs just calls file_update_time().
This is the same behaviour that the fault code does if .page_mkwrite is
not set.

Furthermore, having the page_mkwrite() operator causes
writable_file_mapping_allowed() to fail due to
vma_needs_dirty_tracking() on the gup flow, which is a pre-requisite for
enabling P2PDMA over RDMA.

There are no users of .page_mkwrite and no known valid use cases, so
just remove the .page_mkwrite from kernfs_ops and return -EINVAL if an
mmap() implementation sets .page_mkwrite.

Co-developed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
---
 fs/kernfs/file.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

Comments

Greg KH June 12, 2024, 8:09 a.m. UTC | #1
On Tue, Jun 11, 2024 at 12:27:29PM -0600, Martin Oliveira wrote:
> The .page_mkwrite operator of kernfs just calls file_update_time().
> This is the same behaviour that the fault code does if .page_mkwrite is
> not set.
> 
> Furthermore, having the page_mkwrite() operator causes
> writable_file_mapping_allowed() to fail due to
> vma_needs_dirty_tracking() on the gup flow, which is a pre-requisite for
> enabling P2PDMA over RDMA.
> 
> There are no users of .page_mkwrite and no known valid use cases, so
> just remove the .page_mkwrite from kernfs_ops and return -EINVAL if an
> mmap() implementation sets .page_mkwrite.
> 
> Co-developed-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
> ---
>  fs/kernfs/file.c | 26 +++-----------------------
>  1 file changed, 3 insertions(+), 23 deletions(-)

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tejun Heo June 12, 2024, 5:29 p.m. UTC | #2
On Tue, Jun 11, 2024 at 12:27:29PM -0600, Martin Oliveira wrote:
> The .page_mkwrite operator of kernfs just calls file_update_time().
> This is the same behaviour that the fault code does if .page_mkwrite is
> not set.
> 
> Furthermore, having the page_mkwrite() operator causes
> writable_file_mapping_allowed() to fail due to
> vma_needs_dirty_tracking() on the gup flow, which is a pre-requisite for
> enabling P2PDMA over RDMA.
> 
> There are no users of .page_mkwrite and no known valid use cases, so
> just remove the .page_mkwrite from kernfs_ops and return -EINVAL if an
> mmap() implementation sets .page_mkwrite.
> 
> Co-developed-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.
Dan Carpenter June 13, 2024, 5:15 a.m. UTC | #3
Hi Martin,

kernel test robot noticed the following build warnings:

url:    https://github.com/intel-lab-lkp/linux/commits/Martin-Oliveira/kernfs-remove-page_mkwrite-from-vm_operations_struct/20240612-023130
base:   83a7eefedc9b56fe7bfeff13b6c7356688ffa670
patch link:    https://lore.kernel.org/r/20240611182732.360317-2-martin.oliveira%40eideticom.com
patch subject: [PATCH v2 1/4] kernfs: remove page_mkwrite() from vm_operations_struct
config: i386-randconfig-141-20240612 (https://download.01.org/0day-ci/archive/20240613/202406130357.6NmgCbMP-lkp@intel.com/config)
compiler: gcc-12 (Ubuntu 12.3.0-9ubuntu2) 12.3.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202406130357.6NmgCbMP-lkp@intel.com/

smatch warnings:
fs/kernfs/file.c:462 kernfs_fop_mmap() error: we previously assumed 'vma->vm_ops' could be null (see line 459)

vim +462 fs/kernfs/file.c

c637b8acbe079e Tejun Heo           2013-12-11  416  static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
414985ae23c031 Tejun Heo           2013-11-28  417  {
c525aaddc366df Tejun Heo           2013-12-11  418  	struct kernfs_open_file *of = kernfs_of(file);
414985ae23c031 Tejun Heo           2013-11-28  419  	const struct kernfs_ops *ops;
414985ae23c031 Tejun Heo           2013-11-28  420  	int rc;
414985ae23c031 Tejun Heo           2013-11-28  421  
9b2db6e1894577 Tejun Heo           2013-12-10  422  	/*
9b2db6e1894577 Tejun Heo           2013-12-10  423  	 * mmap path and of->mutex are prone to triggering spurious lockdep
9b2db6e1894577 Tejun Heo           2013-12-10  424  	 * warnings and we don't want to add spurious locking dependency
9b2db6e1894577 Tejun Heo           2013-12-10  425  	 * between the two.  Check whether mmap is actually implemented
9b2db6e1894577 Tejun Heo           2013-12-10  426  	 * without grabbing @of->mutex by testing HAS_MMAP flag.  See the
c810729fe6471a Ahelenia ZiemiaƄska 2023-12-21  427  	 * comment in kernfs_fop_open() for more details.
9b2db6e1894577 Tejun Heo           2013-12-10  428  	 */
df23fc39bce03b Tejun Heo           2013-12-11  429  	if (!(of->kn->flags & KERNFS_HAS_MMAP))
9b2db6e1894577 Tejun Heo           2013-12-10  430  		return -ENODEV;
9b2db6e1894577 Tejun Heo           2013-12-10  431  
414985ae23c031 Tejun Heo           2013-11-28  432  	mutex_lock(&of->mutex);
414985ae23c031 Tejun Heo           2013-11-28  433  
414985ae23c031 Tejun Heo           2013-11-28  434  	rc = -ENODEV;
c637b8acbe079e Tejun Heo           2013-12-11  435  	if (!kernfs_get_active(of->kn))
414985ae23c031 Tejun Heo           2013-11-28  436  		goto out_unlock;
414985ae23c031 Tejun Heo           2013-11-28  437  
324a56e16e44ba Tejun Heo           2013-12-11  438  	ops = kernfs_ops(of->kn);
414985ae23c031 Tejun Heo           2013-11-28  439  	rc = ops->mmap(of, vma);
b44b2140265ddf Tejun Heo           2014-04-20  440  	if (rc)
b44b2140265ddf Tejun Heo           2014-04-20  441  		goto out_put;
414985ae23c031 Tejun Heo           2013-11-28  442  
414985ae23c031 Tejun Heo           2013-11-28  443  	/*
414985ae23c031 Tejun Heo           2013-11-28  444  	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
414985ae23c031 Tejun Heo           2013-11-28  445  	 * to satisfy versions of X which crash if the mmap fails: that
414985ae23c031 Tejun Heo           2013-11-28  446  	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
414985ae23c031 Tejun Heo           2013-11-28  447  	 */
414985ae23c031 Tejun Heo           2013-11-28  448  	if (vma->vm_file != file)
414985ae23c031 Tejun Heo           2013-11-28  449  		goto out_put;
414985ae23c031 Tejun Heo           2013-11-28  450  
414985ae23c031 Tejun Heo           2013-11-28  451  	rc = -EINVAL;
414985ae23c031 Tejun Heo           2013-11-28  452  	if (of->mmapped && of->vm_ops != vma->vm_ops)
414985ae23c031 Tejun Heo           2013-11-28  453  		goto out_put;
414985ae23c031 Tejun Heo           2013-11-28  454  
414985ae23c031 Tejun Heo           2013-11-28  455  	/*
414985ae23c031 Tejun Heo           2013-11-28  456  	 * It is not possible to successfully wrap close.
414985ae23c031 Tejun Heo           2013-11-28  457  	 * So error if someone is trying to use close.
414985ae23c031 Tejun Heo           2013-11-28  458  	 */
414985ae23c031 Tejun Heo           2013-11-28 @459  	if (vma->vm_ops && vma->vm_ops->close)
                                                            ^^^^^^^^^^^
If ->vm_ops is NULL

414985ae23c031 Tejun Heo           2013-11-28  460  		goto out_put;
414985ae23c031 Tejun Heo           2013-11-28  461  
927bb8d619fea4 Martin Oliveira     2024-06-11 @462  	if (vma->vm_ops->page_mkwrite)
                                                            ^^^^^^^^^^^^^^^^^^^^^^^^^
then we're in trouble

927bb8d619fea4 Martin Oliveira     2024-06-11  463  		goto out_put;
927bb8d619fea4 Martin Oliveira     2024-06-11  464  
414985ae23c031 Tejun Heo           2013-11-28  465  	rc = 0;
05d8f255867e31 Neel Natu           2024-01-27  466  	if (!of->mmapped) {
a1d82aff5df760 Tejun Heo           2016-12-27  467  		of->mmapped = true;
bdb2fd7fc56e19 Tejun Heo           2022-08-27  468  		of_on(of)->nr_mmapped++;
414985ae23c031 Tejun Heo           2013-11-28  469  		of->vm_ops = vma->vm_ops;
05d8f255867e31 Neel Natu           2024-01-27  470  	}
414985ae23c031 Tejun Heo           2013-11-28  471  	vma->vm_ops = &kernfs_vm_ops;
414985ae23c031 Tejun Heo           2013-11-28  472  out_put:
c637b8acbe079e Tejun Heo           2013-12-11  473  	kernfs_put_active(of->kn);
414985ae23c031 Tejun Heo           2013-11-28  474  out_unlock:
414985ae23c031 Tejun Heo           2013-11-28  475  	mutex_unlock(&of->mutex);
414985ae23c031 Tejun Heo           2013-11-28  476  
414985ae23c031 Tejun Heo           2013-11-28  477  	return rc;
414985ae23c031 Tejun Heo           2013-11-28  478  }
Christoph Hellwig June 13, 2024, 5:44 a.m. UTC | #4
On Tue, Jun 11, 2024 at 12:27:29PM -0600, Martin Oliveira wrote:
> +	if (vma->vm_ops->page_mkwrite)
> +		goto out_put;
> +

I'd probably make this a WARN_ON so that driver authors trying to
add a page_mkwrite in the vm_ops passed to kernfs get a big fat
warning instead of spending a couple hours trying to track down
what is going wrong :)
John Hubbard June 15, 2024, 2:32 a.m. UTC | #5
On 6/11/24 11:27 AM, Martin Oliveira wrote:
> The .page_mkwrite operator of kernfs just calls file_update_time().
> This is the same behaviour that the fault code does if .page_mkwrite is
> not set.
> 
> Furthermore, having the page_mkwrite() operator causes
> writable_file_mapping_allowed() to fail due to
> vma_needs_dirty_tracking() on the gup flow, which is a pre-requisite for
> enabling P2PDMA over RDMA.
> 
> There are no users of .page_mkwrite and no known valid use cases, so
> just remove the .page_mkwrite from kernfs_ops and return -EINVAL if an
> mmap() implementation sets .page_mkwrite.

Hi Martin and Logan!

First of all, I admire this approach to solving one of the gup+filesystem
interaction problems, by coming in from the other direction. Neat. :)


> 
> Co-developed-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
> ---
>   fs/kernfs/file.c | 26 +++-----------------------
>   1 file changed, 3 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 8502ef68459b9..a198cb0718772 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -386,28 +386,6 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
>   	return ret;
>   }
>   
> -static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
> -{
> -	struct file *file = vmf->vma->vm_file;
> -	struct kernfs_open_file *of = kernfs_of(file);
> -	vm_fault_t ret;
> -
> -	if (!of->vm_ops)
> -		return VM_FAULT_SIGBUS;
> -
> -	if (!kernfs_get_active(of->kn))
> -		return VM_FAULT_SIGBUS;
> -
> -	ret = 0;
> -	if (of->vm_ops->page_mkwrite)
> -		ret = of->vm_ops->page_mkwrite(vmf);
> -	else
> -		file_update_time(file);
> -
> -	kernfs_put_active(of->kn);
> -	return ret;
> -}
> -
>   static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
>   			     void *buf, int len, int write)
>   {
> @@ -432,7 +410,6 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
>   static const struct vm_operations_struct kernfs_vm_ops = {
>   	.open		= kernfs_vma_open,
>   	.fault		= kernfs_vma_fault,
> -	.page_mkwrite	= kernfs_vma_page_mkwrite,
>   	.access		= kernfs_vma_access,
>   };
>   
> @@ -482,6 +459,9 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
>   	if (vma->vm_ops && vma->vm_ops->close)
>   		goto out_put;
>   
> +	if (vma->vm_ops->page_mkwrite)

As the kernel test bot results imply, you probably want to do it like this:

    	if (vma->vm_ops && vma->vm_ops->page_mkwrite)


> +		goto out_put;
> +
>   	rc = 0;
>   	if (!of->mmapped) {
>   		of->mmapped = true;

thanks,
diff mbox series

Patch

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8502ef68459b9..a198cb0718772 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -386,28 +386,6 @@  static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
-{
-	struct file *file = vmf->vma->vm_file;
-	struct kernfs_open_file *of = kernfs_of(file);
-	vm_fault_t ret;
-
-	if (!of->vm_ops)
-		return VM_FAULT_SIGBUS;
-
-	if (!kernfs_get_active(of->kn))
-		return VM_FAULT_SIGBUS;
-
-	ret = 0;
-	if (of->vm_ops->page_mkwrite)
-		ret = of->vm_ops->page_mkwrite(vmf);
-	else
-		file_update_time(file);
-
-	kernfs_put_active(of->kn);
-	return ret;
-}
-
 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
 			     void *buf, int len, int write)
 {
@@ -432,7 +410,6 @@  static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
 static const struct vm_operations_struct kernfs_vm_ops = {
 	.open		= kernfs_vma_open,
 	.fault		= kernfs_vma_fault,
-	.page_mkwrite	= kernfs_vma_page_mkwrite,
 	.access		= kernfs_vma_access,
 };
 
@@ -482,6 +459,9 @@  static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->close)
 		goto out_put;
 
+	if (vma->vm_ops->page_mkwrite)
+		goto out_put;
+
 	rc = 0;
 	if (!of->mmapped) {
 		of->mmapped = true;