Message ID | 20240127020833.487907-2-kent.overstreet@linux.dev (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | lockdep cmp fn conversions | expand |
On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > *_lock_nested() is fundamentally broken; lockdep needs to check lock > ordering, but we cannot device a total ordering on an unbounded number > of elements with only a few subclasses. > > the replacement is to define lock ordering with a proper comparison > function. > > fs/pipe.c was already doing everything correctly otherwise, nothing > much changes here. > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Jan Kara <jack@suse.cz> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> I had to digest for a while what this new lockdep lock ordering feature is about. I have one pending question - what is the motivation of this conversion of pipe code? AFAIU we don't have any problems with lockdep annotations on pipe->mutex because there are always only two subclasses? Honza > --- > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------ > 1 file changed, 36 insertions(+), 45 deletions(-) > > diff --git a/fs/pipe.c b/fs/pipe.c > index f1adbfe743d4..50c8a8596b52 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; > * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 > */ > > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) > +#define cmp_int(l, r) ((l > r) - (l < r)) > + > +#ifdef CONFIG_PROVE_LOCKING > +static int pipe_lock_cmp_fn(const struct lockdep_map *a, > + const struct lockdep_map *b) > { > - if (pipe->files) > - mutex_lock_nested(&pipe->mutex, subclass); > + return cmp_int((unsigned long) a, (unsigned long) b); > } > +#endif > > void pipe_lock(struct pipe_inode_info *pipe) > { > - /* > - * pipe_lock() nests non-pipe inode locks (for writing to a file) > - */ > - pipe_lock_nested(pipe, I_MUTEX_PARENT); > + if (pipe->files) > + mutex_lock(&pipe->mutex); > } > EXPORT_SYMBOL(pipe_lock); > > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) > } > EXPORT_SYMBOL(pipe_unlock); > > -static inline void __pipe_lock(struct pipe_inode_info *pipe) > -{ > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > -} > - > -static inline void __pipe_unlock(struct pipe_inode_info *pipe) > -{ > - mutex_unlock(&pipe->mutex); > -} > - > void pipe_double_lock(struct pipe_inode_info *pipe1, > struct pipe_inode_info *pipe2) > { > BUG_ON(pipe1 == pipe2); > > - if (pipe1 < pipe2) { > - pipe_lock_nested(pipe1, I_MUTEX_PARENT); > - pipe_lock_nested(pipe2, I_MUTEX_CHILD); > - } else { > - pipe_lock_nested(pipe2, I_MUTEX_PARENT); > - pipe_lock_nested(pipe1, I_MUTEX_CHILD); > - } > + if (pipe1 > pipe2) > + swap(pipe1, pipe2); > + > + pipe_lock(pipe1); > + pipe_lock(pipe2); > } > > static void anon_pipe_buf_release(struct pipe_inode_info *pipe, > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > return 0; > > ret = 0; > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > /* > * We only wake up writers if the pipe was full when we started > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > ret = -EAGAIN; > break; > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > /* > * We only get here if we didn't actually read anything. > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) > return -ERESTARTSYS; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); > wake_next_reader = true; > } > if (pipe_empty(pipe->head, pipe->tail)) > wake_next_reader = false; > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > if (was_full) > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > if (unlikely(total_len == 0)) > return 0; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > if (!pipe->readers) { > send_sig(SIGPIPE, current, 0); > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > * after waiting we need to re-check whether the pipe > * become empty while we dropped the lock. > */ > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > if (was_empty) > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > was_empty = pipe_empty(pipe->head, pipe->tail); > wake_next_writer = true; > } > out: > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) > wake_next_writer = false; > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > /* > * If we do do a wakeup event, we do a 'sync' wakeup, because we > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > switch (cmd) { > case FIONREAD: > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > count = 0; > head = pipe->head; > tail = pipe->tail; > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > count += pipe->bufs[tail & mask].len; > tail++; > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > return put_user(count, (int __user *)arg); > > #ifdef CONFIG_WATCH_QUEUE > case IOC_WATCH_QUEUE_SET_SIZE: { > int ret; > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > ret = watch_queue_set_size(pipe, arg); > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return ret; > } > > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) > { > struct pipe_inode_info *pipe = file->private_data; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > if (file->f_mode & FMODE_READ) > pipe->readers--; > if (file->f_mode & FMODE_WRITE) > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > put_pipe_info(inode, pipe); > return 0; > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) > struct pipe_inode_info *pipe = filp->private_data; > int retval = 0; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > if (filp->f_mode & FMODE_READ) > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) > /* this can happen only if on == T */ > fasync_helper(-1, filp, 0, &pipe->fasync_readers); > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return retval; > } > > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) > pipe->nr_accounted = pipe_bufs; > pipe->user = user; > mutex_init(&pipe->mutex); > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); > return pipe; > } > > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > filp->private_data = pipe; > /* OK, we have a pipe and it's pinned down */ > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > /* We can only do regular read/write on fifos */ > stream_open(inode, filp); > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > } > > /* Ok! */ > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return 0; > > err_rd: > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > goto err; > > err: > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > put_pipe_info(inode, pipe); > return ret; > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > if (!pipe) > return -EBADF; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > switch (cmd) { > case F_SETPIPE_SZ: > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > break; > } > > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return ret; > } > > -- > 2.43.0 >
On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote: > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > ordering, but we cannot device a total ordering on an unbounded number > > of elements with only a few subclasses. > > > > the replacement is to define lock ordering with a proper comparison > > function. > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > much changes here. > > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > Cc: Christian Brauner <brauner@kernel.org> > > Cc: Jan Kara <jack@suse.cz> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > I had to digest for a while what this new lockdep lock ordering feature is > about. I have one pending question - what is the motivation of this > conversion of pipe code? AFAIU we don't have any problems with lockdep > annotations on pipe->mutex because there are always only two subclasses? > > Honza Hi, "Numbers talk - Bullshit walks." (Linus Torvalds) In things of pipes - I normally benchmark like this (example): root# cat /dev/sdc | pipebench > /dev/null Do you have numbers for your patch-series? Thanks. BG, -Sedat- [1] https://packages.debian.org/pipebench > > > --- > > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------ > > 1 file changed, 36 insertions(+), 45 deletions(-) > > > > diff --git a/fs/pipe.c b/fs/pipe.c > > index f1adbfe743d4..50c8a8596b52 100644 > > --- a/fs/pipe.c > > +++ b/fs/pipe.c > > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; > > * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 > > */ > > > > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) > > +#define cmp_int(l, r) ((l > r) - (l < r)) > > + > > +#ifdef CONFIG_PROVE_LOCKING > > +static int pipe_lock_cmp_fn(const struct lockdep_map *a, > > + const struct lockdep_map *b) > > { > > - if (pipe->files) > > - mutex_lock_nested(&pipe->mutex, subclass); > > + return cmp_int((unsigned long) a, (unsigned long) b); > > } > > +#endif > > > > void pipe_lock(struct pipe_inode_info *pipe) > > { > > - /* > > - * pipe_lock() nests non-pipe inode locks (for writing to a file) > > - */ > > - pipe_lock_nested(pipe, I_MUTEX_PARENT); > > + if (pipe->files) > > + mutex_lock(&pipe->mutex); > > } > > EXPORT_SYMBOL(pipe_lock); > > > > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) > > } > > EXPORT_SYMBOL(pipe_unlock); > > > > -static inline void __pipe_lock(struct pipe_inode_info *pipe) > > -{ > > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > > -} > > - > > -static inline void __pipe_unlock(struct pipe_inode_info *pipe) > > -{ > > - mutex_unlock(&pipe->mutex); > > -} > > - > > void pipe_double_lock(struct pipe_inode_info *pipe1, > > struct pipe_inode_info *pipe2) > > { > > BUG_ON(pipe1 == pipe2); > > > > - if (pipe1 < pipe2) { > > - pipe_lock_nested(pipe1, I_MUTEX_PARENT); > > - pipe_lock_nested(pipe2, I_MUTEX_CHILD); > > - } else { > > - pipe_lock_nested(pipe2, I_MUTEX_PARENT); > > - pipe_lock_nested(pipe1, I_MUTEX_CHILD); > > - } > > + if (pipe1 > pipe2) > > + swap(pipe1, pipe2); > > + > > + pipe_lock(pipe1); > > + pipe_lock(pipe2); > > } > > > > static void anon_pipe_buf_release(struct pipe_inode_info *pipe, > > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > return 0; > > > > ret = 0; > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > /* > > * We only wake up writers if the pipe was full when we started > > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > ret = -EAGAIN; > > break; > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > /* > > * We only get here if we didn't actually read anything. > > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) > > return -ERESTARTSYS; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); > > wake_next_reader = true; > > } > > if (pipe_empty(pipe->head, pipe->tail)) > > wake_next_reader = false; > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > if (was_full) > > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); > > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > > if (unlikely(total_len == 0)) > > return 0; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > if (!pipe->readers) { > > send_sig(SIGPIPE, current, 0); > > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > > * after waiting we need to re-check whether the pipe > > * become empty while we dropped the lock. > > */ > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > if (was_empty) > > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); > > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > was_empty = pipe_empty(pipe->head, pipe->tail); > > wake_next_writer = true; > > } > > out: > > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) > > wake_next_writer = false; > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > /* > > * If we do do a wakeup event, we do a 'sync' wakeup, because we > > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > > > switch (cmd) { > > case FIONREAD: > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > count = 0; > > head = pipe->head; > > tail = pipe->tail; > > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > count += pipe->bufs[tail & mask].len; > > tail++; > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > return put_user(count, (int __user *)arg); > > > > #ifdef CONFIG_WATCH_QUEUE > > case IOC_WATCH_QUEUE_SET_SIZE: { > > int ret; > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > ret = watch_queue_set_size(pipe, arg); > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return ret; > > } > > > > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) > > { > > struct pipe_inode_info *pipe = file->private_data; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > if (file->f_mode & FMODE_READ) > > pipe->readers--; > > if (file->f_mode & FMODE_WRITE) > > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) > > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > put_pipe_info(inode, pipe); > > return 0; > > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) > > struct pipe_inode_info *pipe = filp->private_data; > > int retval = 0; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > if (filp->f_mode & FMODE_READ) > > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); > > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { > > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) > > /* this can happen only if on == T */ > > fasync_helper(-1, filp, 0, &pipe->fasync_readers); > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return retval; > > } > > > > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) > > pipe->nr_accounted = pipe_bufs; > > pipe->user = user; > > mutex_init(&pipe->mutex); > > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); > > return pipe; > > } > > > > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > filp->private_data = pipe; > > /* OK, we have a pipe and it's pinned down */ > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > /* We can only do regular read/write on fifos */ > > stream_open(inode, filp); > > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > } > > > > /* Ok! */ > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return 0; > > > > err_rd: > > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > goto err; > > > > err: > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > put_pipe_info(inode, pipe); > > return ret; > > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > > if (!pipe) > > return -EBADF; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > switch (cmd) { > > case F_SETPIPE_SZ: > > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > > break; > > } > > > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return ret; > > } > > > > -- > > 2.43.0 > > > -- > Jan Kara <jack@suse.com> > SUSE Labs, CR >
On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote: > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > ordering, but we cannot device a total ordering on an unbounded number > > of elements with only a few subclasses. > > > > the replacement is to define lock ordering with a proper comparison > > function. > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > much changes here. > > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > Cc: Christian Brauner <brauner@kernel.org> > > Cc: Jan Kara <jack@suse.cz> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > I had to digest for a while what this new lockdep lock ordering feature is > about. I have one pending question - what is the motivation of this > conversion of pipe code? AFAIU we don't have any problems with lockdep > annotations on pipe->mutex because there are always only two subclasses? It's one of the easier conversions to do, and ideally /all/ users of subclasses would go away. Start with the easier ones, figure out those patterns, then the harder...
On Fri 02-02-24 13:25:20, Sedat Dilek wrote: > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote: > > > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > > ordering, but we cannot device a total ordering on an unbounded number > > > of elements with only a few subclasses. > > > > > > the replacement is to define lock ordering with a proper comparison > > > function. > > > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > > much changes here. > > > > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > > Cc: Christian Brauner <brauner@kernel.org> > > > Cc: Jan Kara <jack@suse.cz> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > > > I had to digest for a while what this new lockdep lock ordering feature is > > about. I have one pending question - what is the motivation of this > > conversion of pipe code? AFAIU we don't have any problems with lockdep > > annotations on pipe->mutex because there are always only two subclasses? > > > > Honza > > Hi, > > "Numbers talk - Bullshit walks." (Linus Torvalds) > > In things of pipes - I normally benchmark like this (example): > > root# cat /dev/sdc | pipebench > /dev/null > > Do you have numbers for your patch-series? Sedat AFAIU this patch is not about performance at all but rather about lockdep instrumentation... But maybe I'm missing your point? Honza
On Mon, Feb 5, 2024 at 10:54 AM Jan Kara <jack@suse.cz> wrote: > > On Fri 02-02-24 13:25:20, Sedat Dilek wrote: > > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote: > > > > > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > > > ordering, but we cannot device a total ordering on an unbounded number > > > > of elements with only a few subclasses. > > > > > > > > the replacement is to define lock ordering with a proper comparison > > > > function. > > > > > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > > > much changes here. > > > > > > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > > > Cc: Christian Brauner <brauner@kernel.org> > > > > Cc: Jan Kara <jack@suse.cz> > > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > > > > > I had to digest for a while what this new lockdep lock ordering feature is > > > about. I have one pending question - what is the motivation of this > > > conversion of pipe code? AFAIU we don't have any problems with lockdep > > > annotations on pipe->mutex because there are always only two subclasses? > > > > > > Honza > > > > Hi, > > > > "Numbers talk - Bullshit walks." (Linus Torvalds) > > > > In things of pipes - I normally benchmark like this (example): > > > > root# cat /dev/sdc | pipebench > /dev/null > > > > Do you have numbers for your patch-series? > > Sedat AFAIU this patch is not about performance at all but rather about > lockdep instrumentation... But maybe I'm missing your point? > Sorry, I missed the point, Jan. -Sedat-
On Fri 02-02-24 07:47:50, Kent Overstreet wrote: > On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote: > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > > ordering, but we cannot device a total ordering on an unbounded number > > > of elements with only a few subclasses. > > > > > > the replacement is to define lock ordering with a proper comparison > > > function. > > > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > > much changes here. > > > > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > > Cc: Christian Brauner <brauner@kernel.org> > > > Cc: Jan Kara <jack@suse.cz> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > > > I had to digest for a while what this new lockdep lock ordering feature is > > about. I have one pending question - what is the motivation of this > > conversion of pipe code? AFAIU we don't have any problems with lockdep > > annotations on pipe->mutex because there are always only two subclasses? > > It's one of the easier conversions to do, and ideally /all/ users of > subclasses would go away. > > Start with the easier ones, figure out those patterns, then the > harder... I see, thanks for explanation. So in the pipes case I actually like that the patch makes the locking less obfuscated with lockdep details (to which I'm mostly used to but still ;)). So feel free to add: Reviewed-by: Jan Kara <jack@suse.cz> for this patch. I'm not 100% convinced it will be always possible to replace subclasses with the new ordering mechanism but I guess time will show. Honza
diff --git a/fs/pipe.c b/fs/pipe.c index f1adbfe743d4..50c8a8596b52 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) +#define cmp_int(l, r) ((l > r) - (l < r)) + +#ifdef CONFIG_PROVE_LOCKING +static int pipe_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) { - if (pipe->files) - mutex_lock_nested(&pipe->mutex, subclass); + return cmp_int((unsigned long) a, (unsigned long) b); } +#endif void pipe_lock(struct pipe_inode_info *pipe) { - /* - * pipe_lock() nests non-pipe inode locks (for writing to a file) - */ - pipe_lock_nested(pipe, I_MUTEX_PARENT); + if (pipe->files) + mutex_lock(&pipe->mutex); } EXPORT_SYMBOL(pipe_lock); @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) } EXPORT_SYMBOL(pipe_unlock); -static inline void __pipe_lock(struct pipe_inode_info *pipe) -{ - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); -} - -static inline void __pipe_unlock(struct pipe_inode_info *pipe) -{ - mutex_unlock(&pipe->mutex); -} - void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { BUG_ON(pipe1 == pipe2); - if (pipe1 < pipe2) { - pipe_lock_nested(pipe1, I_MUTEX_PARENT); - pipe_lock_nested(pipe2, I_MUTEX_CHILD); - } else { - pipe_lock_nested(pipe2, I_MUTEX_PARENT); - pipe_lock_nested(pipe1, I_MUTEX_CHILD); - } + if (pipe1 > pipe2) + swap(pipe1, pipe2); + + pipe_lock(pipe1); + pipe_lock(pipe2); } static void anon_pipe_buf_release(struct pipe_inode_info *pipe, @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) return 0; ret = 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); /* * We only wake up writers if the pipe was full when we started @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) ret = -EAGAIN; break; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); /* * We only get here if we didn't actually read anything. @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); wake_next_reader = true; } if (pipe_empty(pipe->head, pipe->tail)) wake_next_reader = false; - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); if (was_full) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) if (unlikely(total_len == 0)) return 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (!pipe->readers) { send_sig(SIGPIPE, current, 0); @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * after waiting we need to re-check whether the pipe * become empty while we dropped the lock. */ - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); was_empty = pipe_empty(pipe->head, pipe->tail); wake_next_writer = true; } out: if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) wake_next_writer = false; - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); /* * If we do do a wakeup event, we do a 'sync' wakeup, because we @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FIONREAD: - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); count = 0; head = pipe->head; tail = pipe->tail; @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) count += pipe->bufs[tail & mask].len; tail++; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return put_user(count, (int __user *)arg); #ifdef CONFIG_WATCH_QUEUE case IOC_WATCH_QUEUE_SET_SIZE: { int ret; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); ret = watch_queue_set_size(pipe, arg); - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return ret; } @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) { struct pipe_inode_info *pipe = file->private_data; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (file->f_mode & FMODE_READ) pipe->readers--; if (file->f_mode & FMODE_WRITE) @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return 0; @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) struct pipe_inode_info *pipe = filp->private_data; int retval = 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (filp->f_mode & FMODE_READ) retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) /* this can happen only if on == T */ fasync_helper(-1, filp, 0, &pipe->fasync_readers); } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return retval; } @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) pipe->nr_accounted = pipe_bufs; pipe->user = user; mutex_init(&pipe->mutex); + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); return pipe; } @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) filp->private_data = pipe; /* OK, we have a pipe and it's pinned down */ - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); /* We can only do regular read/write on fifos */ stream_open(inode, filp); @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } /* Ok! */ - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return 0; err_rd: @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp) goto err; err: - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return ret; @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) if (!pipe) return -EBADF; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); switch (cmd) { case F_SETPIPE_SZ: @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) break; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return ret; }
*_lock_nested() is fundamentally broken; lockdep needs to check lock ordering, but we cannot device a total ordering on an unbounded number of elements with only a few subclasses. the replacement is to define lock ordering with a proper comparison function. fs/pipe.c was already doing everything correctly otherwise, nothing much changes here. Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christian Brauner <brauner@kernel.org> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 45 deletions(-)