diff mbox

[RFC,2/3] pipe: protect pipe_max_size access with a mutex

Message ID 8b2369ef-a537-974e-725b-f55d49a646b6@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Joe Lawrence Sept. 19, 2017, 9:47 p.m. UTC
On 09/14/2017 07:09 PM, Mikulas Patocka wrote:
> On Tue, 5 Sep 2017, Joe Lawrence wrote:
> 
>> pipe_max_size is assigned directly via procfs sysctl:
>>
>>   static struct ctl_table fs_table[] = {
>>           ...
>>           {
>>                   .procname       = "pipe-max-size",
>>                   .data           = &pipe_max_size,
>>                   .maxlen         = sizeof(int),
>>                   .mode           = 0644,
>>                   .proc_handler   = &pipe_proc_fn,
>>                   .extra1         = &pipe_min_size,
>>           },
>>           ...
>>
>>   int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
>>                    size_t *lenp, loff_t *ppos)
>>   {
>>           ...
>>           ret = proc_dointvec_minmax(table, write, buf, lenp, ppos)
>>           ...
>>
>> and then later rounded in-place a few statements later:
>>
>>           ...
>>           pipe_max_size = round_pipe_size(pipe_max_size);
>>           ...
>>
>> This leaves a window of time between initial assignment and rounding
>> that may be visible to other threads.  (For example, one thread sets a
>> non-rounded value to pipe_max_size while another reads its value.)
>>
>> Similar reads of pipe_max_size are potentially racey:
>>
>>   pipe.c :: alloc_pipe_info()
>>   pipe.c :: pipe_set_size()
>>
>> Protect them and the procfs sysctl assignment with a mutex.
>>
>> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
>> Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
>> ---
>>  fs/pipe.c | 28 ++++++++++++++++++++++++----
>>  1 file changed, 24 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/pipe.c b/fs/pipe.c
>> index fa28910b3c59..33bb11b0d78e 100644
>> --- a/fs/pipe.c
>> +++ b/fs/pipe.c
>> @@ -35,6 +35,11 @@
>>  unsigned int pipe_max_size = 1048576;
>>  
>>  /*
>> + * Provide mutual exclusion around access to pipe_max_size
>> + */
>> +static DEFINE_MUTEX(pipe_max_mutex);
>> +
>> +/*
>>   * Minimum pipe size, as required by POSIX
>>   */
>>  unsigned int pipe_min_size = PAGE_SIZE;
>> @@ -623,13 +628,18 @@ struct pipe_inode_info *alloc_pipe_info(void)
>>  	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
>>  	struct user_struct *user = get_current_user();
>>  	unsigned long user_bufs;
>> +	unsigned int max_size;
>>  
>>  	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
>>  	if (pipe == NULL)
>>  		goto out_free_uid;
>>  
>> -	if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE))
>> -		pipe_bufs = pipe_max_size >> PAGE_SHIFT;
>> +	mutex_lock(&pipe_max_mutex);
>> +	max_size = pipe_max_size;
>> +	mutex_unlock(&pipe_max_mutex);
>> +
>> +	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
>> +		pipe_bufs = max_size >> PAGE_SHIFT;
>>  
>>  	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
>>  
>> @@ -1039,6 +1049,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
>>  	struct pipe_buffer *bufs;
>>  	unsigned int size, nr_pages;
>>  	unsigned long user_bufs;
>> +	unsigned int max_size;
>>  	long ret = 0;
>>  
>>  	size = round_pipe_size(arg);
>> @@ -1056,8 +1067,11 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
>>  	 * Decreasing the pipe capacity is always permitted, even
>>  	 * if the user is currently over a limit.
>>  	 */
>> +	mutex_lock(&pipe_max_mutex);
>> +	max_size = pipe_max_size;
>> +	mutex_unlock(&pipe_max_mutex);
>>  	if (nr_pages > pipe->buffers &&
>> -			size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
>> +			size > max_size && !capable(CAP_SYS_RESOURCE))
>>  		return -EPERM;
>>  
>>  	user_bufs = account_pipe_buffers(pipe->user, pipe->buffers, nr_pages);
>> @@ -1131,18 +1145,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
>>  	unsigned int rounded_pipe_max_size;
>>  	int ret;
>>  
>> +	mutex_lock(&pipe_max_mutex);
>>  	orig_pipe_max_size = pipe_max_size;
>>  	ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
>> -	if (ret < 0 || !write)
>> +	if (ret < 0 || !write) {
>> +		mutex_unlock(&pipe_max_mutex);
>>  		return ret;
>> +	}
>>  
>>  	rounded_pipe_max_size = round_pipe_size(pipe_max_size);
>>  	if (rounded_pipe_max_size == 0) {
>>  		pipe_max_size = orig_pipe_max_size;
>> +		mutex_unlock(&pipe_max_mutex);
>>  		return -EINVAL;
>>  	}
>>  
>>  	pipe_max_size = rounded_pipe_max_size;
>> +	mutex_unlock(&pipe_max_mutex);
>> +
>>  	return ret;
>>  }
>>  
>> -- 
>> 1.8.3.1
>>
> I think this mutex is too heavy - if multiple processes simultaneously
> create a pipe, the mutex would cause performance degradation.
>
> You can call do_proc_dointvec with a custom callback "conv" function
> that does the rounding of the pipe size value.

Okay, that would consolidate the fiddling with pipe_max_size into a
single place / assignment.

Implementation wise, are you suggesting something like the following
(work in progress).  round_pipe_size() would still be used by
fs/pipe.c::pipe_set_size() so it would need to be visible to both source
files.  The proc_do... function would be pipe-max-size specific as it's
basically:

 - proc_douintvec_minmax, but
    - without the "max" check
    - with an added PAGE_SIZE floor
    - PAGE_SIZE increment

and exported for pipe.c to call.

Plumbing in the opposite direction (export do_proc_douintvec() and stick
the new conv implementation in fs/pipe.c) might be a little easier, but
seems like the same ick-factor in the end.

Regards,

-- Joe

-->8-- -->8-- -->8-- -->8--

 #ifdef CONFIG_COREDUMP
@@ -3179,6 +3221,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct
ctl_table *table, int write,
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
+EXPORT_SYMBOL_GPL(proc_dopipe_max_size);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);

Comments

Mikulas Patocka Sept. 25, 2017, 10:44 a.m. UTC | #1
On Tue, 19 Sep 2017, Joe Lawrence wrote:

> On 09/14/2017 07:09 PM, Mikulas Patocka wrote:
> >
> > I think this mutex is too heavy - if multiple processes simultaneously
> > create a pipe, the mutex would cause performance degradation.
> >
> > You can call do_proc_dointvec with a custom callback "conv" function
> > that does the rounding of the pipe size value.
> 
> Okay, that would consolidate the fiddling with pipe_max_size into a
> single place / assignment.
> 
> Implementation wise, are you suggesting something like the following
> (work in progress).  round_pipe_size() would still be used by
> fs/pipe.c::pipe_set_size() so it would need to be visible to both source
> files.  The proc_do... function would be pipe-max-size specific as it's
> basically:
> 
>  - proc_douintvec_minmax, but
>     - without the "max" check
>     - with an added PAGE_SIZE floor
>     - PAGE_SIZE increment
> 
> and exported for pipe.c to call.
> 
> Plumbing in the opposite direction (export do_proc_douintvec() and stick
> the new conv implementation in fs/pipe.c) might be a little easier, but
> seems like the same ick-factor in the end.
> 
> Regards,
> 
> -- Joe

Yes. I think this is correct.

Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>

> -->8-- -->8-- -->8-- -->8--
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 8cbc97d97753..4db3cd2d139c 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -1019,7 +1019,7 @@ static int fifo_open(struct inode *inode, struct
> file *filp)
>   * Currently we rely on the pipe array holding a power-of-2 number
>   * of pages. Returns 0 on error.
>   */
> -static inline unsigned int round_pipe_size(unsigned int size)
> +unsigned int round_pipe_size(unsigned int size)
>  {
>  	unsigned long nr_pages;
> 
> @@ -1130,19 +1130,7 @@ static long pipe_set_size(struct pipe_inode_info
> *pipe, unsigned long arg)
>  int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
>  		 size_t *lenp, loff_t *ppos)
>  {
> -	unsigned int rounded_pipe_max_size;
> -	int ret;
> -
> -	ret = proc_douintvec_minmax(table, write, buf, lenp, ppos);
> -	if (ret < 0 || !write)
> -		return ret;
> -
> -	rounded_pipe_max_size = round_pipe_size(pipe_max_size);
> -	if (rounded_pipe_max_size == 0)
> -		return -EINVAL;
> -
> -	pipe_max_size = rounded_pipe_max_size;
> -	return ret;
> +	return proc_dopipe_max_size(table, write, buf, lenp, ppos);
>  }
> 
>  /*
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index e7497c9dde7f..485cf7a7aa8f 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -190,5 +190,6 @@ static inline int pipe_buf_steal(struct
> pipe_inode_info *pipe,
>  struct pipe_inode_info *get_pipe_info(struct file *file);
> 
>  int create_pipe_files(struct file **, int);
> +unsigned int round_pipe_size(unsigned int size);
> 
>  #endif
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index 1d4dba490fb6..ba24ca72800c 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -50,6 +50,9 @@ extern int proc_dointvec_minmax(struct ctl_table *, int,
>  extern int proc_douintvec_minmax(struct ctl_table *table, int write,
>  				 void __user *buffer, size_t *lenp,
>  				 loff_t *ppos);
> +extern int proc_dopipe_max_size(struct ctl_table *table, int write,
> +				void __user *buffer, size_t *lenp,
> +				loff_t *ppos);
>  extern int proc_dointvec_jiffies(struct ctl_table *, int,
>  				 void __user *, size_t *, loff_t *);
>  extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index c976719bf37a..7a2913c5546e 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -67,6 +67,7 @@
>  #include <linux/kexec.h>
>  #include <linux/bpf.h>
>  #include <linux/mount.h>
> +#include <linux/pipe_fs_i.h>
> 
>  #include <linux/uaccess.h>
>  #include <asm/processor.h>
> @@ -2631,6 +2632,47 @@ int proc_douintvec_minmax(struct ctl_table
> *table, int write,
>  				 do_proc_douintvec_minmax_conv, &param);
>  }
> 
> +struct do_proc_dopipe_max_size_conv_param {
> +	unsigned int *min;
> +};
> +
> +static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
> +					unsigned int *valp,
> +					int write, void *data)
> +{
> +	struct do_proc_dopipe_max_size_conv_param *param = data;
> +
> +	if (write) {
> +		unsigned int val = round_pipe_size(*lvalp);
> +
> +		if (val == 0)
> +			return -EINVAL;
> +
> +		if (param->min && *param->min > val)
> +			return -ERANGE;
> +
> +		if (*lvalp > UINT_MAX)
> +			return -EINVAL;
> +
> +		*valp = val;
> +	} else {
> +		unsigned int val = *valp;
> +		*lvalp = (unsigned long) val;
> +	}
> +
> +	return 0;
> +}
> +
> +int proc_dopipe_max_size(struct ctl_table *table, int write,
> +			 void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	struct do_proc_dopipe_max_size_conv_param param = {
> +		.min = (unsigned int *) table->extra1,
> +	};
> +	return do_proc_douintvec(table, write, buffer, lenp, ppos,
> +				 do_proc_dopipe_max_size_conv, &param);
> +}
> +
>  static void validate_coredump_safety(void)
>  {
>  #ifdef CONFIG_COREDUMP
> @@ -3179,6 +3221,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct
> ctl_table *table, int write,
>  EXPORT_SYMBOL(proc_dointvec_jiffies);
>  EXPORT_SYMBOL(proc_dointvec_minmax);
>  EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
> +EXPORT_SYMBOL_GPL(proc_dopipe_max_size);
>  EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
>  EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
>  EXPORT_SYMBOL(proc_dostring);
> -- 
> 1.8.3.1
>
diff mbox

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index 8cbc97d97753..4db3cd2d139c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1019,7 +1019,7 @@  static int fifo_open(struct inode *inode, struct
file *filp)
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-static inline unsigned int round_pipe_size(unsigned int size)
+unsigned int round_pipe_size(unsigned int size)
 {
 	unsigned long nr_pages;

@@ -1130,19 +1130,7 @@  static long pipe_set_size(struct pipe_inode_info
*pipe, unsigned long arg)
 int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 		 size_t *lenp, loff_t *ppos)
 {
-	unsigned int rounded_pipe_max_size;
-	int ret;
-
-	ret = proc_douintvec_minmax(table, write, buf, lenp, ppos);
-	if (ret < 0 || !write)
-		return ret;
-
-	rounded_pipe_max_size = round_pipe_size(pipe_max_size);
-	if (rounded_pipe_max_size == 0)
-		return -EINVAL;
-
-	pipe_max_size = rounded_pipe_max_size;
-	return ret;
+	return proc_dopipe_max_size(table, write, buf, lenp, ppos);
 }

 /*
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e7497c9dde7f..485cf7a7aa8f 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -190,5 +190,6 @@  static inline int pipe_buf_steal(struct
pipe_inode_info *pipe,
 struct pipe_inode_info *get_pipe_info(struct file *file);

 int create_pipe_files(struct file **, int);
+unsigned int round_pipe_size(unsigned int size);

 #endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1d4dba490fb6..ba24ca72800c 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -50,6 +50,9 @@  extern int proc_dointvec_minmax(struct ctl_table *, int,
 extern int proc_douintvec_minmax(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
+extern int proc_dopipe_max_size(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp,
+				loff_t *ppos);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c976719bf37a..7a2913c5546e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,6 +67,7 @@ 
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
+#include <linux/pipe_fs_i.h>

 #include <linux/uaccess.h>
 #include <asm/processor.h>
@@ -2631,6 +2632,47 @@  int proc_douintvec_minmax(struct ctl_table
*table, int write,
 				 do_proc_douintvec_minmax_conv, &param);
 }

+struct do_proc_dopipe_max_size_conv_param {
+	unsigned int *min;
+};
+
+static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
+					unsigned int *valp,
+					int write, void *data)
+{
+	struct do_proc_dopipe_max_size_conv_param *param = data;
+
+	if (write) {
+		unsigned int val = round_pipe_size(*lvalp);
+
+		if (val == 0)
+			return -EINVAL;
+
+		if (param->min && *param->min > val)
+			return -ERANGE;
+
+		if (*lvalp > UINT_MAX)
+			return -EINVAL;
+
+		*valp = val;
+	} else {
+		unsigned int val = *valp;
+		*lvalp = (unsigned long) val;
+	}
+
+	return 0;
+}
+
+int proc_dopipe_max_size(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct do_proc_dopipe_max_size_conv_param param = {
+		.min = (unsigned int *) table->extra1,
+	};
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_dopipe_max_size_conv, &param);
+}
+
 static void validate_coredump_safety(void)
 {