diff mbox series

[1/2] vfs: replace i_readcount with a biased i_count

Message ID 20190608135717.8472-2-amir73il@gmail.com
State New, archived
Headers show
Series Fix write leases on overlayfs | expand

Commit Message

Amir Goldstein June 8, 2019, 1:57 p.m. UTC
Count struct files open RO together with inode reference count instead
of using a dedicated i_readcount field.  This will allow us to use the
RO count also when CONFIG_IMA is not defined and will reduce the size of
struct inode for 32bit archs when CONFIG_IMA is defined.

We need this RO count for posix leases code, which currently naively
checks i_count and d_count in an inaccurate manner.

Should regular i_count overflow into RO count bias by struct files
opened for write, it's not a big deal, as we mostly need the RO count
to be reliable when the first writer comes along.

Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 include/linux/fs.h                | 33 +++++++++++++++++++------------
 security/integrity/ima/ima_main.c |  2 +-
 2 files changed, 21 insertions(+), 14 deletions(-)

Comments

Miklos Szeredi June 9, 2019, 7:36 p.m. UTC | #1
On Sat, Jun 8, 2019 at 3:57 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> Count struct files open RO together with inode reference count instead
> of using a dedicated i_readcount field.  This will allow us to use the
> RO count also when CONFIG_IMA is not defined and will reduce the size of
> struct inode for 32bit archs when CONFIG_IMA is defined.
>
> We need this RO count for posix leases code, which currently naively
> checks i_count and d_count in an inaccurate manner.
>
> Should regular i_count overflow into RO count bias by struct files
> opened for write, it's not a big deal, as we mostly need the RO count
> to be reliable when the first writer comes along.
>
> Cc: <stable@vger.kernel.org> # v4.19
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  include/linux/fs.h                | 33 +++++++++++++++++++------------
>  security/integrity/ima/ima_main.c |  2 +-
>  2 files changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f7fdfe93e25d..504bf17967dd 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -694,9 +694,6 @@ struct inode {
>         atomic_t                i_count;
>         atomic_t                i_dio_count;
>         atomic_t                i_writecount;
> -#ifdef CONFIG_IMA
> -       atomic_t                i_readcount; /* struct files open RO */
> -#endif
>         union {
>                 const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
>                 void (*free_inode)(struct inode *);
> @@ -2890,26 +2887,36 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
>         return atomic_read(&inode->i_writecount) > 0;
>  }
>
> -#ifdef CONFIG_IMA
> +/*
> + * Count struct files open RO together with inode rerefernce count.
> + * We need this count for IMA and for posix leases. The RO count should not
> + * include files opened RDWR nor files opened O_PATH and internal kernel
> + * inode references, like the ones taken by overlayfs and inotify.
> + * Should regular i_count overflow into I_RO_COUNT_BIAS by struct files
> + * opened for write, it's not a big deal, as we mostly need
> + * inode_is_open_rdonly() to be reliable when the first writer comes along.

The bigger issue is allowing i_count to wrap around: all you need is a
file with 1025 hard links and 4194304 opens of said file.  Doable
without privileges?  Not sure, but it does seem pretty fragile.

BTW the current 32 bit i_readcount that IMA uses also has wraparound
issues, though that's not nearly as bad.

Going to a 64 bit i_count would fix these, I guess.

Thanks,
Miklos
Mimi Zohar June 12, 2019, 12:51 p.m. UTC | #2
On Sat, 2019-06-08 at 16:57 +0300, Amir Goldstein wrote:
> Count struct files open RO together with inode reference count instead
> of using a dedicated i_readcount field.  This will allow us to use the
> RO count also when CONFIG_IMA is not defined and will reduce the size of
> struct inode for 32bit archs when CONFIG_IMA is defined.
> 
> We need this RO count for posix leases code, which currently naively
> checks i_count and d_count in an inaccurate manner.
> 
> Should regular i_count overflow into RO count bias by struct files
> opened for write, it's not a big deal, as we mostly need the RO count
> to be reliable when the first writer comes along.

"i_count" has been defined forever.  Has its meaning changed?  This
patch implies that "i_readcount" was never really needed.

Mimi

> 
> Cc: <stable@vger.kernel.org> # v4.19
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  include/linux/fs.h                | 33 +++++++++++++++++++------------
>  security/integrity/ima/ima_main.c |  2 +-
>  2 files changed, 21 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f7fdfe93e25d..504bf17967dd 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -694,9 +694,6 @@ struct inode {
>  	atomic_t		i_count;
>  	atomic_t		i_dio_count;
>  	atomic_t		i_writecount;
> -#ifdef CONFIG_IMA
> -	atomic_t		i_readcount; /* struct files open RO */
> -#endif
>  	union {
>  		const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
>  		void (*free_inode)(struct inode *);
> @@ -2890,26 +2887,36 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
>  	return atomic_read(&inode->i_writecount) > 0;
>  }
>  
> -#ifdef CONFIG_IMA
> +/*
> + * Count struct files open RO together with inode rerefernce count.
> + * We need this count for IMA and for posix leases. The RO count should not
> + * include files opened RDWR nor files opened O_PATH and internal kernel
> + * inode references, like the ones taken by overlayfs and inotify.
> + * Should regular i_count overflow into I_RO_COUNT_BIAS by struct files
> + * opened for write, it's not a big deal, as we mostly need
> + * inode_is_open_rdonly() to be reliable when the first writer comes along.
> + */
> +#define I_RO_COUNT_SHIFT 10
> +#define I_RO_COUNT_BIAS	(1UL << I_RO_COUNT_SHIFT)
> +
>  static inline void i_readcount_dec(struct inode *inode)
>  {
> -	BUG_ON(!atomic_read(&inode->i_readcount));
> -	atomic_dec(&inode->i_readcount);
> +	WARN_ON(atomic_read(&inode->i_count) < I_RO_COUNT_BIAS);
> +	atomic_sub(I_RO_COUNT_BIAS, &inode->i_count);
>  }
>  static inline void i_readcount_inc(struct inode *inode)
>  {
> -	atomic_inc(&inode->i_readcount);
> +	atomic_add(I_RO_COUNT_BIAS, &inode->i_count);
>  }
> -#else
> -static inline void i_readcount_dec(struct inode *inode)
> +static inline int i_readcount_read(const struct inode *inode)
>  {
> -	return;
> +	return atomic_read(&inode->i_count) >> I_RO_COUNT_SHIFT;
>  }
> -static inline void i_readcount_inc(struct inode *inode)
> +static inline bool inode_is_open_rdonly(const struct inode *inode)
>  {
> -	return;
> +	return atomic_read(&inode->i_count) > I_RO_COUNT_BIAS;
>  }
> -#endif
> +
>  extern int do_pipe_flags(int *, int);
>  
>  #define __kernel_read_file_id(id) \
> diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
> index 357edd140c09..766bac778d11 100644
> --- a/security/integrity/ima/ima_main.c
> +++ b/security/integrity/ima/ima_main.c
> @@ -94,7 +94,7 @@ static void ima_rdwr_violation_check(struct file *file,
>  	bool send_tomtou = false, send_writers = false;
>  
>  	if (mode & FMODE_WRITE) {
> -		if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
> +		if (inode_is_open_rdonly(inode) && IS_IMA(inode)) {
>  			if (!iint)
>  				iint = integrity_iint_find(inode);
>  			/* IMA_MEASURE is set from reader side */
Amir Goldstein June 12, 2019, 3:09 p.m. UTC | #3
On Wed, Jun 12, 2019 at 3:52 PM Mimi Zohar <zohar@linux.ibm.com> wrote:
>
> On Sat, 2019-06-08 at 16:57 +0300, Amir Goldstein wrote:
> > Count struct files open RO together with inode reference count instead
> > of using a dedicated i_readcount field.  This will allow us to use the
> > RO count also when CONFIG_IMA is not defined and will reduce the size of
> > struct inode for 32bit archs when CONFIG_IMA is defined.
> >
> > We need this RO count for posix leases code, which currently naively
> > checks i_count and d_count in an inaccurate manner.
> >
> > Should regular i_count overflow into RO count bias by struct files
> > opened for write, it's not a big deal, as we mostly need the RO count
> > to be reliable when the first writer comes along.
>
> "i_count" has been defined forever.  Has its meaning changed?  This
> patch implies that "i_readcount" was never really needed.
>

Not really.
i_count is only used to know if object is referenced.
It does not matter if user takes 1 or more references on i_count
as long as user puts back all the references it took.

If user took i_readcount, i_count cannot be zero, so short of overflow,
we can describe i_readcount as a biased i_count.

But if I am following Miklos' suggestion to make i_count 64bit, inode
struct size is going to grow for 32bit arch when  CONFIG_IMA is not
defined, so to reduce impact, I will keep i_readcount as a separate
member and let it be defined also when BITS_PER_LONG == 64
and implement inode_is_open_rdonly() using d_count and i_count
when i_readcount is not defined.

Let's see what people will have to say about that...

Thanks,
Amir.
bfields@fieldses.org June 12, 2019, 3:29 p.m. UTC | #4
On Wed, Jun 12, 2019 at 06:09:59PM +0300, Amir Goldstein wrote:
> But if I am following Miklos' suggestion to make i_count 64bit, inode
> struct size is going to grow for 32bit arch when  CONFIG_IMA is not
> defined, so to reduce impact, I will keep i_readcount as a separate
> member and let it be defined also when BITS_PER_LONG == 64
> and implement inode_is_open_rdonly() using d_count and i_count
> when i_readcount is not defined.

How bad would it be just to let the inode be a little bigger?  How big
is it already on 32 bit architectures?  How much does this change e.g.
how many inodes you can cache per megabyte?

--b.
Amir Goldstein June 12, 2019, 3:35 p.m. UTC | #5
On Wed, Jun 12, 2019 at 6:29 PM J . Bruce Fields <bfields@fieldses.org> wrote:
>
> On Wed, Jun 12, 2019 at 06:09:59PM +0300, Amir Goldstein wrote:
> > But if I am following Miklos' suggestion to make i_count 64bit, inode
> > struct size is going to grow for 32bit arch when  CONFIG_IMA is not
> > defined, so to reduce impact, I will keep i_readcount as a separate
> > member and let it be defined also when BITS_PER_LONG == 64
> > and implement inode_is_open_rdonly() using d_count and i_count
> > when i_readcount is not defined.
>
> How bad would it be just to let the inode be a little bigger?  How big
> is it already on 32 bit architectures?  How much does this change e.g.
> how many inodes you can cache per megabyte?
>

It's hard to answer how tiny changes like this impact users with different
configs, especially to IoT ones, so I do not like increasing size of inode
unconditionally, but I will go with:
-#ifdef CONFIG_IMA
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
        atomic_t                i_readcount; /* struct files open RO */
 #endif

So IoT guys can have an option to keep inode size the same
and not let the locks code worry about it.

OK?

Thanks,
Amir.
Mimi Zohar June 12, 2019, 3:59 p.m. UTC | #6
On Wed, 2019-06-12 at 18:09 +0300, Amir Goldstein wrote:
> On Wed, Jun 12, 2019 at 3:52 PM Mimi Zohar <zohar@linux.ibm.com> wrote:
> >
> > On Sat, 2019-06-08 at 16:57 +0300, Amir Goldstein wrote:
> > > Count struct files open RO together with inode reference count instead
> > > of using a dedicated i_readcount field.  This will allow us to use the
> > > RO count also when CONFIG_IMA is not defined and will reduce the size of
> > > struct inode for 32bit archs when CONFIG_IMA is defined.
> > >
> > > We need this RO count for posix leases code, which currently naively
> > > checks i_count and d_count in an inaccurate manner.
> > >
> > > Should regular i_count overflow into RO count bias by struct files
> > > opened for write, it's not a big deal, as we mostly need the RO count
> > > to be reliable when the first writer comes along.
> >
> > "i_count" has been defined forever.  Has its meaning changed?  This
> > patch implies that "i_readcount" was never really needed.
> >
> 
> Not really.
> i_count is only used to know if object is referenced.
> It does not matter if user takes 1 or more references on i_count
> as long as user puts back all the references it took.
> 
> If user took i_readcount, i_count cannot be zero, so short of overflow,
> we can describe i_readcount as a biased i_count.

Having a count was originally to make sure we weren't missing
anything.  As long as we can detect if a file is opened for read, the
less IMA specific code there is, the better.

> 
> But if I am following Miklos' suggestion to make i_count 64bit, inode
> struct size is going to grow for 32bit arch when  CONFIG_IMA is not
> defined, so to reduce impact, I will keep i_readcount as a separate
> member and let it be defined also when BITS_PER_LONG == 64
> and implement inode_is_open_rdonly() using d_count and i_count
> when i_readcount is not defined.
> 
> Let's see what people will have to say about that...

Ok

Mimi
diff mbox series

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7fdfe93e25d..504bf17967dd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -694,9 +694,6 @@  struct inode {
 	atomic_t		i_count;
 	atomic_t		i_dio_count;
 	atomic_t		i_writecount;
-#ifdef CONFIG_IMA
-	atomic_t		i_readcount; /* struct files open RO */
-#endif
 	union {
 		const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 		void (*free_inode)(struct inode *);
@@ -2890,26 +2887,36 @@  static inline bool inode_is_open_for_write(const struct inode *inode)
 	return atomic_read(&inode->i_writecount) > 0;
 }
 
-#ifdef CONFIG_IMA
+/*
+ * Count struct files open RO together with inode rerefernce count.
+ * We need this count for IMA and for posix leases. The RO count should not
+ * include files opened RDWR nor files opened O_PATH and internal kernel
+ * inode references, like the ones taken by overlayfs and inotify.
+ * Should regular i_count overflow into I_RO_COUNT_BIAS by struct files
+ * opened for write, it's not a big deal, as we mostly need
+ * inode_is_open_rdonly() to be reliable when the first writer comes along.
+ */
+#define I_RO_COUNT_SHIFT 10
+#define I_RO_COUNT_BIAS	(1UL << I_RO_COUNT_SHIFT)
+
 static inline void i_readcount_dec(struct inode *inode)
 {
-	BUG_ON(!atomic_read(&inode->i_readcount));
-	atomic_dec(&inode->i_readcount);
+	WARN_ON(atomic_read(&inode->i_count) < I_RO_COUNT_BIAS);
+	atomic_sub(I_RO_COUNT_BIAS, &inode->i_count);
 }
 static inline void i_readcount_inc(struct inode *inode)
 {
-	atomic_inc(&inode->i_readcount);
+	atomic_add(I_RO_COUNT_BIAS, &inode->i_count);
 }
-#else
-static inline void i_readcount_dec(struct inode *inode)
+static inline int i_readcount_read(const struct inode *inode)
 {
-	return;
+	return atomic_read(&inode->i_count) >> I_RO_COUNT_SHIFT;
 }
-static inline void i_readcount_inc(struct inode *inode)
+static inline bool inode_is_open_rdonly(const struct inode *inode)
 {
-	return;
+	return atomic_read(&inode->i_count) > I_RO_COUNT_BIAS;
 }
-#endif
+
 extern int do_pipe_flags(int *, int);
 
 #define __kernel_read_file_id(id) \
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 357edd140c09..766bac778d11 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -94,7 +94,7 @@  static void ima_rdwr_violation_check(struct file *file,
 	bool send_tomtou = false, send_writers = false;
 
 	if (mode & FMODE_WRITE) {
-		if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
+		if (inode_is_open_rdonly(inode) && IS_IMA(inode)) {
 			if (!iint)
 				iint = integrity_iint_find(inode);
 			/* IMA_MEASURE is set from reader side */