diff mbox series

[v2,16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu

Message ID 20201120231441.29911-16-ebiederm@xmission.com (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series [v2,01/24] exec: Move unshare_files to fix posix file locking during exec | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Eric W. Biederman Nov. 20, 2020, 11:14 p.m. UTC
When discussing[1] exec and posix file locks it was realized that none
of the callers of get_files_struct fundamentally needed to call
get_files_struct, and that by switching them to helper functions
instead it will both simplify their code and remove unnecessary
increments of files_struct.count.  Those unnecessary increments can
result in exec unnecessarily unsharing files_struct which breaking
posix locks, and it can result in fget_light having to fallback to
fget reducing system performance.

Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by
moving the checking for the maximum file descritor into the generic
code, and by remvoing the need for capturing and releasing a reference
on files_struct.  As the reference count of files_struct no longer
needs to be maintained bpf_iter_seq_task_file_info can have it's files
member removed and task_file_seq_get_next no longer needs it's fstruct
argument.

The curr_fd local variable does need to become unsigned to be used
with fnext_task.  As curr_fd is assigned from and assigned a u32
making curr_fd an unsigned int won't cause problems and might prevent
them.

[1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com
Suggested-by: Oleg Nesterov <oleg@redhat.com>
v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/bpf/task_iter.c | 44 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

Comments

Yonghong Song Nov. 23, 2020, 5:06 p.m. UTC | #1
On 11/20/20 3:14 PM, Eric W. Biederman wrote:
> When discussing[1] exec and posix file locks it was realized that none
> of the callers of get_files_struct fundamentally needed to call
> get_files_struct, and that by switching them to helper functions
> instead it will both simplify their code and remove unnecessary
> increments of files_struct.count.  Those unnecessary increments can
> result in exec unnecessarily unsharing files_struct which breaking
> posix locks, and it can result in fget_light having to fallback to
> fget reducing system performance.
> 
> Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by
> moving the checking for the maximum file descritor into the generic
> code, and by remvoing the need for capturing and releasing a reference
> on files_struct.  As the reference count of files_struct no longer
> needs to be maintained bpf_iter_seq_task_file_info can have it's files
> member removed and task_file_seq_get_next no longer needs it's fstruct
> argument.
> 
> The curr_fd local variable does need to become unsigned to be used
> with fnext_task.  As curr_fd is assigned from and assigned a u32
> making curr_fd an unsigned int won't cause problems and might prevent
> them.
> 
> [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com
> Suggested-by: Oleg Nesterov <oleg@redhat.com>
> v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>   kernel/bpf/task_iter.c | 44 ++++++++++--------------------------------
>   1 file changed, 10 insertions(+), 34 deletions(-)


Just a heads-up. The following patch
   bpf-next: 91b2db27d3ff ("bpf: Simplify task_file_seq_get_next()")
 
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=91b2db27d3ff9ad29e8b3108dfbf1e2f49fe9bd3
has been merged in bpf-next tree.

It will have merge conflicts with this patch. The above patch
is a refactoring for simplification with no functionality change.

> 
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 5ab2ccfb96cb..4ec63170c741 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info {
>   	 */
>   	struct bpf_iter_seq_task_common common;
>   	struct task_struct *task;
> -	struct files_struct *files;
>   	u32 tid;
>   	u32 fd;
>   };
>   
>   static struct file *
>   task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
> -		       struct task_struct **task, struct files_struct **fstruct)
> +		       struct task_struct **task)
>   {
>   	struct pid_namespace *ns = info->common.ns;
> -	u32 curr_tid = info->tid, max_fds;
> -	struct files_struct *curr_files;
> +	u32 curr_tid = info->tid;
>   	struct task_struct *curr_task;
> -	int curr_fd = info->fd;
> +	unsigned int curr_fd = info->fd;
>   
>   	/* If this function returns a non-NULL file object,
> -	 * it held a reference to the task/files_struct/file.
> +	 * it held a reference to the task/file.
>   	 * Otherwise, it does not hold any reference.
>   	 */
>   again:
>   	if (*task) {
>   		curr_task = *task;
> -		curr_files = *fstruct;
>   		curr_fd = info->fd;
>   	} else {
>   		curr_task = task_seq_get_next(ns, &curr_tid, true);
>   		if (!curr_task)
>   			return NULL;
>   
> -		curr_files = get_files_struct(curr_task);
> -		if (!curr_files) {
> -			put_task_struct(curr_task);
> -			curr_tid = ++(info->tid);
> -			info->fd = 0;
> -			goto again;
> -		}
> -
> -		/* set *fstruct, *task and info->tid */
> -		*fstruct = curr_files;
> +		/* set *task and info->tid */
>   		*task = curr_task;
>   		if (curr_tid == info->tid) {
>   			curr_fd = info->fd;
> @@ -179,13 +167,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
>   	}
>   
>   	rcu_read_lock();
> -	max_fds = files_fdtable(curr_files)->max_fds;
> -	for (; curr_fd < max_fds; curr_fd++) {
> +	for (;; curr_fd++) {
>   		struct file *f;
> -
> -		f = files_lookup_fd_rcu(curr_files, curr_fd);
> +		f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
>   		if (!f)
> -			continue;
> +			break;
>   		if (!get_file_rcu(f))
>   			continue;
>   
> @@ -197,10 +183,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
>   
>   	/* the current task is done, go to the next task */
>   	rcu_read_unlock();
> -	put_files_struct(curr_files);
>   	put_task_struct(curr_task);
>   	*task = NULL;
> -	*fstruct = NULL;
>   	info->fd = 0;
>   	curr_tid = ++(info->tid);
>   	goto again;
> @@ -209,13 +193,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
>   static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
>   {
>   	struct bpf_iter_seq_task_file_info *info = seq->private;
> -	struct files_struct *files = NULL;
>   	struct task_struct *task = NULL;
>   	struct file *file;
>   
> -	file = task_file_seq_get_next(info, &task, &files);
> +	file = task_file_seq_get_next(info, &task);
>   	if (!file) {
> -		info->files = NULL;
>   		info->task = NULL;
>   		return NULL;
>   	}
> @@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
>   	if (*pos == 0)
>   		++*pos;
>   	info->task = task;
> -	info->files = files;
>   
>   	return file;
>   }
> @@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
>   static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>   {
>   	struct bpf_iter_seq_task_file_info *info = seq->private;
> -	struct files_struct *files = info->files;
>   	struct task_struct *task = info->task;
>   	struct file *file;
>   
>   	++*pos;
>   	++info->fd;
>   	fput((struct file *)v);
> -	file = task_file_seq_get_next(info, &task, &files);
> +	file = task_file_seq_get_next(info, &task);
>   	if (!file) {
> -		info->files = NULL;
>   		info->task = NULL;
>   		return NULL;
>   	}
>   
>   	info->task = task;
> -	info->files = files;
>   
>   	return file;
>   }
> @@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
>   		(void)__task_file_seq_show(seq, v, true);
>   	} else {
>   		fput((struct file *)v);
> -		put_files_struct(info->files);
>   		put_task_struct(info->task);
> -		info->files = NULL;
>   		info->task = NULL;
>   	}
>   }
>
diff mbox series

Patch

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 5ab2ccfb96cb..4ec63170c741 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -130,45 +130,33 @@  struct bpf_iter_seq_task_file_info {
 	 */
 	struct bpf_iter_seq_task_common common;
 	struct task_struct *task;
-	struct files_struct *files;
 	u32 tid;
 	u32 fd;
 };
 
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
-		       struct task_struct **task, struct files_struct **fstruct)
+		       struct task_struct **task)
 {
 	struct pid_namespace *ns = info->common.ns;
-	u32 curr_tid = info->tid, max_fds;
-	struct files_struct *curr_files;
+	u32 curr_tid = info->tid;
 	struct task_struct *curr_task;
-	int curr_fd = info->fd;
+	unsigned int curr_fd = info->fd;
 
 	/* If this function returns a non-NULL file object,
-	 * it held a reference to the task/files_struct/file.
+	 * it held a reference to the task/file.
 	 * Otherwise, it does not hold any reference.
 	 */
 again:
 	if (*task) {
 		curr_task = *task;
-		curr_files = *fstruct;
 		curr_fd = info->fd;
 	} else {
 		curr_task = task_seq_get_next(ns, &curr_tid, true);
 		if (!curr_task)
 			return NULL;
 
-		curr_files = get_files_struct(curr_task);
-		if (!curr_files) {
-			put_task_struct(curr_task);
-			curr_tid = ++(info->tid);
-			info->fd = 0;
-			goto again;
-		}
-
-		/* set *fstruct, *task and info->tid */
-		*fstruct = curr_files;
+		/* set *task and info->tid */
 		*task = curr_task;
 		if (curr_tid == info->tid) {
 			curr_fd = info->fd;
@@ -179,13 +167,11 @@  task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 	}
 
 	rcu_read_lock();
-	max_fds = files_fdtable(curr_files)->max_fds;
-	for (; curr_fd < max_fds; curr_fd++) {
+	for (;; curr_fd++) {
 		struct file *f;
-
-		f = files_lookup_fd_rcu(curr_files, curr_fd);
+		f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
 		if (!f)
-			continue;
+			break;
 		if (!get_file_rcu(f))
 			continue;
 
@@ -197,10 +183,8 @@  task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 
 	/* the current task is done, go to the next task */
 	rcu_read_unlock();
-	put_files_struct(curr_files);
 	put_task_struct(curr_task);
 	*task = NULL;
-	*fstruct = NULL;
 	info->fd = 0;
 	curr_tid = ++(info->tid);
 	goto again;
@@ -209,13 +193,11 @@  task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct bpf_iter_seq_task_file_info *info = seq->private;
-	struct files_struct *files = NULL;
 	struct task_struct *task = NULL;
 	struct file *file;
 
-	file = task_file_seq_get_next(info, &task, &files);
+	file = task_file_seq_get_next(info, &task);
 	if (!file) {
-		info->files = NULL;
 		info->task = NULL;
 		return NULL;
 	}
@@ -223,7 +205,6 @@  static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		++*pos;
 	info->task = task;
-	info->files = files;
 
 	return file;
 }
@@ -231,22 +212,19 @@  static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct bpf_iter_seq_task_file_info *info = seq->private;
-	struct files_struct *files = info->files;
 	struct task_struct *task = info->task;
 	struct file *file;
 
 	++*pos;
 	++info->fd;
 	fput((struct file *)v);
-	file = task_file_seq_get_next(info, &task, &files);
+	file = task_file_seq_get_next(info, &task);
 	if (!file) {
-		info->files = NULL;
 		info->task = NULL;
 		return NULL;
 	}
 
 	info->task = task;
-	info->files = files;
 
 	return file;
 }
@@ -295,9 +273,7 @@  static void task_file_seq_stop(struct seq_file *seq, void *v)
 		(void)__task_file_seq_show(seq, v, true);
 	} else {
 		fput((struct file *)v);
-		put_files_struct(info->files);
 		put_task_struct(info->task);
-		info->files = NULL;
 		info->task = NULL;
 	}
 }