diff mbox series

[2/2,RFC] amdkfd CRIU fixes

Message ID 20240604021456.GB3053943@ZenIV (mailing list archive)
State New
Headers show
Series [1/2,RFC] amdgpu: fix a race in kfd_mem_export_dmabuf() | expand

Commit Message

Al Viro June 4, 2024, 2:14 a.m. UTC
Instead of trying to use close_fd() on failure exits, just have
criu_get_prime_handle() store the file reference without inserting
it into descriptor table.

Then, once the callers are past the last failure exit, they can go
and either insert all those file references into the corresponding
slots of descriptor table, or drop all those file references and
free the unused descriptors.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---

Comments

Felix Kuehling June 4, 2024, 6:16 p.m. UTC | #1
On 2024-06-03 22:14, Al Viro wrote:
> Instead of trying to use close_fd() on failure exits, just have
> criu_get_prime_handle() store the file reference without inserting
> it into descriptor table.
>
> Then, once the callers are past the last failure exit, they can go
> and either insert all those file references into the corresponding
> slots of descriptor table, or drop all those file references and
> free the unused descriptors.
>
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Thank you for the patches and the explanation. One minor nit-pick 
inline. With that fixed, this patch is

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>

I can apply this patch to amd-staging-drm-next, if you want. See one 
comment inline ...


> ---
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index fdf171ad4a3c..3f129e1c0daa 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -36,7 +36,6 @@
>   #include <linux/mman.h>
>   #include <linux/ptrace.h>
>   #include <linux/dma-buf.h>
> -#include <linux/fdtable.h>
>   #include <linux/processor.h>
>   #include "kfd_priv.h"
>   #include "kfd_device_queue_manager.h"
> @@ -1857,7 +1856,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
>   }
>   
>   static int criu_get_prime_handle(struct kgd_mem *mem,
> -				 int flags, u32 *shared_fd)
> +				 int flags, u32 *shared_fd,
> +				 struct file **file)
>   {
>   	struct dma_buf *dmabuf;
>   	int ret;
> @@ -1868,13 +1868,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
>   		return ret;
>   	}
>   
> -	ret = dma_buf_fd(dmabuf, flags);
> +	ret = get_unused_fd_flags(flags);
>   	if (ret < 0) {
>   		pr_err("dmabuf create fd failed, ret:%d\n", ret);
>   		goto out_free_dmabuf;
>   	}
>   
>   	*shared_fd = ret;
> +	*file = dmabuf->file;
>   	return 0;
>   
>   out_free_dmabuf:
> @@ -1882,6 +1883,24 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
>   	return ret;
>   }
>   
> +static void commit_files(struct file **files,
> +			 struct kfd_criu_bo_bucket *bo_buckets,
> +			 unsigned int count,
> +			 int err)
> +{
> +	while (count--) {
> +		struct file *file = files[count];
> +		if (!file)

checkpatch.pl would complain here without an empty line after the 
variable definition.

Regards,
   Felix


> +			continue;
> +		if (err) {
> +			fput(file);
> +			put_unused_fd(bo_buckets[count].dmabuf_fd);
> +		} else {
> +			fd_install(bo_buckets[count].dmabuf_fd, file);
> +		}
> +	}
> +}
> +
>   static int criu_checkpoint_bos(struct kfd_process *p,
>   			       uint32_t num_bos,
>   			       uint8_t __user *user_bos,
> @@ -1890,6 +1909,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
>   {
>   	struct kfd_criu_bo_bucket *bo_buckets;
>   	struct kfd_criu_bo_priv_data *bo_privs;
> +	struct file **files = NULL;
>   	int ret = 0, pdd_index, bo_index = 0, id;
>   	void *mem;
>   
> @@ -1903,6 +1923,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
>   		goto exit;
>   	}
>   
> +	files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
> +	if (!files) {
> +		ret = -ENOMEM;
> +		goto exit;
> +	}
> +
>   	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
>   		struct kfd_process_device *pdd = p->pdds[pdd_index];
>   		struct amdgpu_bo *dumper_bo;
> @@ -1950,7 +1976,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
>   				ret = criu_get_prime_handle(kgd_mem,
>   						bo_bucket->alloc_flags &
>   						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
> -						&bo_bucket->dmabuf_fd);
> +						&bo_bucket->dmabuf_fd, &files[bo_index]);
>   				if (ret)
>   					goto exit;
>   			} else {
> @@ -2001,12 +2027,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
>   	*priv_offset += num_bos * sizeof(*bo_privs);
>   
>   exit:
> -	while (ret && bo_index--) {
> -		if (bo_buckets[bo_index].alloc_flags
> -		    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
> -			close_fd(bo_buckets[bo_index].dmabuf_fd);
> -	}
> -
> +	commit_files(files, bo_buckets, bo_index, ret);
> +	kvfree(files);
>   	kvfree(bo_buckets);
>   	kvfree(bo_privs);
>   	return ret;
> @@ -2358,7 +2380,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
>   
>   static int criu_restore_bo(struct kfd_process *p,
>   			   struct kfd_criu_bo_bucket *bo_bucket,
> -			   struct kfd_criu_bo_priv_data *bo_priv)
> +			   struct kfd_criu_bo_priv_data *bo_priv,
> +			   struct file **file)
>   {
>   	struct kfd_process_device *pdd;
>   	struct kgd_mem *kgd_mem;
> @@ -2410,7 +2433,7 @@ static int criu_restore_bo(struct kfd_process *p,
>   	if (bo_bucket->alloc_flags
>   	    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
>   		ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
> -					    &bo_bucket->dmabuf_fd);
> +					    &bo_bucket->dmabuf_fd, file);
>   		if (ret)
>   			return ret;
>   	} else {
> @@ -2427,6 +2450,7 @@ static int criu_restore_bos(struct kfd_process *p,
>   {
>   	struct kfd_criu_bo_bucket *bo_buckets = NULL;
>   	struct kfd_criu_bo_priv_data *bo_privs = NULL;
> +	struct file **files = NULL;
>   	int ret = 0;
>   	uint32_t i = 0;
>   
> @@ -2440,6 +2464,12 @@ static int criu_restore_bos(struct kfd_process *p,
>   	if (!bo_buckets)
>   		return -ENOMEM;
>   
> +	files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
> +	if (!files) {
> +		ret = -ENOMEM;
> +		goto exit;
> +	}
> +
>   	ret = copy_from_user(bo_buckets, (void __user *)args->bos,
>   			     args->num_bos * sizeof(*bo_buckets));
>   	if (ret) {
> @@ -2465,7 +2495,7 @@ static int criu_restore_bos(struct kfd_process *p,
>   
>   	/* Create and map new BOs */
>   	for (; i < args->num_bos; i++) {
> -		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
> +		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
>   		if (ret) {
>   			pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
>   			goto exit;
> @@ -2480,11 +2510,8 @@ static int criu_restore_bos(struct kfd_process *p,
>   		ret = -EFAULT;
>   
>   exit:
> -	while (ret && i--) {
> -		if (bo_buckets[i].alloc_flags
> -		   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
> -			close_fd(bo_buckets[i].dmabuf_fd);
> -	}
> +	commit_files(files, bo_buckets, i, ret);
> +	kvfree(files);
>   	kvfree(bo_buckets);
>   	kvfree(bo_privs);
>   	return ret;
Al Viro June 4, 2024, 7:20 p.m. UTC | #2
On Tue, Jun 04, 2024 at 02:16:00PM -0400, Felix Kuehling wrote:
> 
> On 2024-06-03 22:14, Al Viro wrote:
> > Instead of trying to use close_fd() on failure exits, just have
> > criu_get_prime_handle() store the file reference without inserting
> > it into descriptor table.
> > 
> > Then, once the callers are past the last failure exit, they can go
> > and either insert all those file references into the corresponding
> > slots of descriptor table, or drop all those file references and
> > free the unused descriptors.
> > 
> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> 
> Thank you for the patches and the explanation. One minor nit-pick inline.
> With that fixed, this patch is
> 
> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
> 
> I can apply this patch to amd-staging-drm-next, if you want. See one comment
> inline ...

Fine by me; I think this stuff would be better off in the relevant trees -
it's not as if we realistically could unexport close_fd() this cycle anyway,
more's the pity...  So nothing I've got in my queue has that as a prereq and
it would definitely have better odds of getting tested in your tree.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index fdf171ad4a3c..3f129e1c0daa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -36,7 +36,6 @@ 
 #include <linux/mman.h>
 #include <linux/ptrace.h>
 #include <linux/dma-buf.h>
-#include <linux/fdtable.h>
 #include <linux/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -1857,7 +1856,8 @@  static uint32_t get_process_num_bos(struct kfd_process *p)
 }
 
 static int criu_get_prime_handle(struct kgd_mem *mem,
-				 int flags, u32 *shared_fd)
+				 int flags, u32 *shared_fd,
+				 struct file **file)
 {
 	struct dma_buf *dmabuf;
 	int ret;
@@ -1868,13 +1868,14 @@  static int criu_get_prime_handle(struct kgd_mem *mem,
 		return ret;
 	}
 
-	ret = dma_buf_fd(dmabuf, flags);
+	ret = get_unused_fd_flags(flags);
 	if (ret < 0) {
 		pr_err("dmabuf create fd failed, ret:%d\n", ret);
 		goto out_free_dmabuf;
 	}
 
 	*shared_fd = ret;
+	*file = dmabuf->file;
 	return 0;
 
 out_free_dmabuf:
@@ -1882,6 +1883,24 @@  static int criu_get_prime_handle(struct kgd_mem *mem,
 	return ret;
 }
 
+static void commit_files(struct file **files,
+			 struct kfd_criu_bo_bucket *bo_buckets,
+			 unsigned int count,
+			 int err)
+{
+	while (count--) {
+		struct file *file = files[count];
+		if (!file)
+			continue;
+		if (err) {
+			fput(file);
+			put_unused_fd(bo_buckets[count].dmabuf_fd);
+		} else {
+			fd_install(bo_buckets[count].dmabuf_fd, file);
+		}
+	}
+}
+
 static int criu_checkpoint_bos(struct kfd_process *p,
 			       uint32_t num_bos,
 			       uint8_t __user *user_bos,
@@ -1890,6 +1909,7 @@  static int criu_checkpoint_bos(struct kfd_process *p,
 {
 	struct kfd_criu_bo_bucket *bo_buckets;
 	struct kfd_criu_bo_priv_data *bo_privs;
+	struct file **files = NULL;
 	int ret = 0, pdd_index, bo_index = 0, id;
 	void *mem;
 
@@ -1903,6 +1923,12 @@  static int criu_checkpoint_bos(struct kfd_process *p,
 		goto exit;
 	}
 
+	files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+	if (!files) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
 	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
 		struct kfd_process_device *pdd = p->pdds[pdd_index];
 		struct amdgpu_bo *dumper_bo;
@@ -1950,7 +1976,7 @@  static int criu_checkpoint_bos(struct kfd_process *p,
 				ret = criu_get_prime_handle(kgd_mem,
 						bo_bucket->alloc_flags &
 						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
-						&bo_bucket->dmabuf_fd);
+						&bo_bucket->dmabuf_fd, &files[bo_index]);
 				if (ret)
 					goto exit;
 			} else {
@@ -2001,12 +2027,8 @@  static int criu_checkpoint_bos(struct kfd_process *p,
 	*priv_offset += num_bos * sizeof(*bo_privs);
 
 exit:
-	while (ret && bo_index--) {
-		if (bo_buckets[bo_index].alloc_flags
-		    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
-			close_fd(bo_buckets[bo_index].dmabuf_fd);
-	}
-
+	commit_files(files, bo_buckets, bo_index, ret);
+	kvfree(files);
 	kvfree(bo_buckets);
 	kvfree(bo_privs);
 	return ret;
@@ -2358,7 +2380,8 @@  static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 
 static int criu_restore_bo(struct kfd_process *p,
 			   struct kfd_criu_bo_bucket *bo_bucket,
-			   struct kfd_criu_bo_priv_data *bo_priv)
+			   struct kfd_criu_bo_priv_data *bo_priv,
+			   struct file **file)
 {
 	struct kfd_process_device *pdd;
 	struct kgd_mem *kgd_mem;
@@ -2410,7 +2433,7 @@  static int criu_restore_bo(struct kfd_process *p,
 	if (bo_bucket->alloc_flags
 	    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
 		ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
-					    &bo_bucket->dmabuf_fd);
+					    &bo_bucket->dmabuf_fd, file);
 		if (ret)
 			return ret;
 	} else {
@@ -2427,6 +2450,7 @@  static int criu_restore_bos(struct kfd_process *p,
 {
 	struct kfd_criu_bo_bucket *bo_buckets = NULL;
 	struct kfd_criu_bo_priv_data *bo_privs = NULL;
+	struct file **files = NULL;
 	int ret = 0;
 	uint32_t i = 0;
 
@@ -2440,6 +2464,12 @@  static int criu_restore_bos(struct kfd_process *p,
 	if (!bo_buckets)
 		return -ENOMEM;
 
+	files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+	if (!files) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
 	ret = copy_from_user(bo_buckets, (void __user *)args->bos,
 			     args->num_bos * sizeof(*bo_buckets));
 	if (ret) {
@@ -2465,7 +2495,7 @@  static int criu_restore_bos(struct kfd_process *p,
 
 	/* Create and map new BOs */
 	for (; i < args->num_bos; i++) {
-		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
 		if (ret) {
 			pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
 			goto exit;
@@ -2480,11 +2510,8 @@  static int criu_restore_bos(struct kfd_process *p,
 		ret = -EFAULT;
 
 exit:
-	while (ret && i--) {
-		if (bo_buckets[i].alloc_flags
-		   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
-			close_fd(bo_buckets[i].dmabuf_fd);
-	}
+	commit_files(files, bo_buckets, i, ret);
+	kvfree(files);
 	kvfree(bo_buckets);
 	kvfree(bo_privs);
 	return ret;