diff mbox series

[1/2] fuse: Fix race in fuse_writepage_in_flight()

Message ID 154322517208.18737.3297786654135648324.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series [1/2] fuse: Fix race in fuse_writepage_in_flight() | expand

Commit Message

Kirill Tkhai Nov. 26, 2018, 9:46 a.m. UTC
Checking for FR_PENDING in fuse_writepage_in_flight() is racy.
It does not guarantee the first request in misc.write.next list
is not in userspace, since there we take fc->lock, while
fuse_dev_do_read() takes fiq->waitq.lock:

fuse_dev_read()                           fuse_writepage_in_flight()
                                            test_bit(FR_PENDING)
  clear_bit(FR_PENDING)
handle old_req->pages[0] in userspace
                                            copy_highpage(old_req->pages[0], page)
                                            ^^^^^
                                            userspace never sees this pages

The only reliable way to determ, whether we are able to replace
old_req's page, is to completely skip the first request in the list.
This patch makes the function to do that.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/fuse/file.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Comments

Kirill Tkhai Jan. 10, 2019, 10:48 a.m. UTC | #1
Hi, Miklos,

any comments about this?

On 26.11.2018 12:46, Kirill Tkhai wrote:
> Checking for FR_PENDING in fuse_writepage_in_flight() is racy.
> It does not guarantee the first request in misc.write.next list
> is not in userspace, since there we take fc->lock, while
> fuse_dev_do_read() takes fiq->waitq.lock:
> 
> fuse_dev_read()                           fuse_writepage_in_flight()
>                                             test_bit(FR_PENDING)
>   clear_bit(FR_PENDING)
> handle old_req->pages[0] in userspace
>                                             copy_highpage(old_req->pages[0], page)
>                                             ^^^^^
>                                             userspace never sees this pages
> 
> The only reliable way to determ, whether we are able to replace
> old_req's page, is to completely skip the first request in the list.
> This patch makes the function to do that.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  fs/fuse/file.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index b52f9baaa3e7..c6650c68b31a 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1740,6 +1740,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>  {
>  	struct fuse_conn *fc = get_fuse_conn(new_req->inode);
>  	struct fuse_inode *fi = get_fuse_inode(new_req->inode);
> +	struct fuse_req *first_req;
>  	struct fuse_req *tmp;
>  	struct fuse_req *old_req;
>  	bool found = false;
> @@ -1764,6 +1765,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>  	}
>  
>  	new_req->num_pages = 1;
> +	first_req = old_req;
>  	for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
>  		BUG_ON(tmp->inode != new_req->inode);
>  		curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
> @@ -1773,7 +1775,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>  		}
>  	}
>  
> -	if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
> +	if (old_req->num_pages == 1 && old_req != first_req) {
>  		struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
>  
>  		copy_highpage(old_req->pages[0], page);
>
Miklos Szeredi Jan. 10, 2019, 11 a.m. UTC | #2
On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>
> Hi, Miklos,
>
> any comments about this?

Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
good for stressing the writeback_cache code.

Thanks,
Miklos

>
> On 26.11.2018 12:46, Kirill Tkhai wrote:
> > Checking for FR_PENDING in fuse_writepage_in_flight() is racy.
> > It does not guarantee the first request in misc.write.next list
> > is not in userspace, since there we take fc->lock, while
> > fuse_dev_do_read() takes fiq->waitq.lock:
> >
> > fuse_dev_read()                           fuse_writepage_in_flight()
> >                                             test_bit(FR_PENDING)
> >   clear_bit(FR_PENDING)
> > handle old_req->pages[0] in userspace
> >                                             copy_highpage(old_req->pages[0], page)
> >                                             ^^^^^
> >                                             userspace never sees this pages
> >
> > The only reliable way to determ, whether we are able to replace
> > old_req's page, is to completely skip the first request in the list.
> > This patch makes the function to do that.
> >
> > Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> > ---
> >  fs/fuse/file.c |    4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index b52f9baaa3e7..c6650c68b31a 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -1740,6 +1740,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
> >  {
> >       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
> >       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
> > +     struct fuse_req *first_req;
> >       struct fuse_req *tmp;
> >       struct fuse_req *old_req;
> >       bool found = false;
> > @@ -1764,6 +1765,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
> >       }
> >
> >       new_req->num_pages = 1;
> > +     first_req = old_req;
> >       for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
> >               BUG_ON(tmp->inode != new_req->inode);
> >               curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
> > @@ -1773,7 +1775,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
> >               }
> >       }
> >
> > -     if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
> > +     if (old_req->num_pages == 1 && old_req != first_req) {
> >               struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
> >
> >               copy_highpage(old_req->pages[0], page);
> >
Kirill Tkhai Jan. 10, 2019, 11:03 a.m. UTC | #3
On 10.01.2019 14:00, Miklos Szeredi wrote:
> On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>
>> Hi, Miklos,
>>
>> any comments about this?
> 
> Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
> good for stressing the writeback_cache code.

There is no a reproducer, since I found that by eyes during preparation of another patchset.

>>
>> On 26.11.2018 12:46, Kirill Tkhai wrote:
>>> Checking for FR_PENDING in fuse_writepage_in_flight() is racy.
>>> It does not guarantee the first request in misc.write.next list
>>> is not in userspace, since there we take fc->lock, while
>>> fuse_dev_do_read() takes fiq->waitq.lock:
>>>
>>> fuse_dev_read()                           fuse_writepage_in_flight()
>>>                                             test_bit(FR_PENDING)
>>>   clear_bit(FR_PENDING)
>>> handle old_req->pages[0] in userspace
>>>                                             copy_highpage(old_req->pages[0], page)
>>>                                             ^^^^^
>>>                                             userspace never sees this pages
>>>
>>> The only reliable way to determ, whether we are able to replace
>>> old_req's page, is to completely skip the first request in the list.
>>> This patch makes the function to do that.
>>>
>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>> ---
>>>  fs/fuse/file.c |    4 +++-
>>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>>> index b52f9baaa3e7..c6650c68b31a 100644
>>> --- a/fs/fuse/file.c
>>> +++ b/fs/fuse/file.c
>>> @@ -1740,6 +1740,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>>>  {
>>>       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
>>>       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
>>> +     struct fuse_req *first_req;
>>>       struct fuse_req *tmp;
>>>       struct fuse_req *old_req;
>>>       bool found = false;
>>> @@ -1764,6 +1765,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>>>       }
>>>
>>>       new_req->num_pages = 1;
>>> +     first_req = old_req;
>>>       for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
>>>               BUG_ON(tmp->inode != new_req->inode);
>>>               curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
>>> @@ -1773,7 +1775,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
>>>               }
>>>       }
>>>
>>> -     if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
>>> +     if (old_req->num_pages == 1 && old_req != first_req) {
>>>               struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
>>>
>>>               copy_highpage(old_req->pages[0], page);
>>>
Miklos Szeredi Jan. 15, 2019, 3:37 p.m. UTC | #4
On Thu, Jan 10, 2019 at 12:03 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>
> On 10.01.2019 14:00, Miklos Szeredi wrote:
> > On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> >>
> >> Hi, Miklos,
> >>
> >> any comments about this?
> >
> > Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
> > good for stressing the writeback_cache code.
>
> There is no a reproducer, since I found that by eyes during preparation of another patchset.

That's good.  It would even better to have a reproducer, but it
doesn't look easy...

Completely redid this and reordered the patchset so this change is
made before the locking changes actually introduce the bug.  See
fuse.git#for-next.

Thanks,
Miklos
Kirill Tkhai Jan. 15, 2019, 3:55 p.m. UTC | #5
On 15.01.2019 18:37, Miklos Szeredi wrote:
> On Thu, Jan 10, 2019 at 12:03 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>
>> On 10.01.2019 14:00, Miklos Szeredi wrote:
>>> On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>>>
>>>> Hi, Miklos,
>>>>
>>>> any comments about this?
>>>
>>> Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
>>> good for stressing the writeback_cache code.
>>
>> There is no a reproducer, since I found that by eyes during preparation of another patchset.
> 
> That's good.  It would even better to have a reproducer, but it
> doesn't look easy...
> 
> Completely redid this and reordered the patchset so this change is
> made before the locking changes actually introduce the bug.

Hm, I meant that I found this during preparation of the patchset,
but not that fi->lock patchset introduces the bug. I don't think
the patchset is involved:

1)before we had race, because different locks fc->lock and fiq->waitq.lock
are taken in fuse_dev_read() and fuse_writepage_in_flight();
2)after we have the same race, and the locks are fi->lock and fiq->waitq.lock.

>See fuse.git#for-next.

The renewed patch looks correct for me.

Thanks,
Kirill
Miklos Szeredi Jan. 15, 2019, 4:03 p.m. UTC | #6
On Tue, Jan 15, 2019 at 4:55 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>
> On 15.01.2019 18:37, Miklos Szeredi wrote:
> > On Thu, Jan 10, 2019 at 12:03 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> >>
> >> On 10.01.2019 14:00, Miklos Szeredi wrote:
> >>> On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> >>>>
> >>>> Hi, Miklos,
> >>>>
> >>>> any comments about this?
> >>>
> >>> Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
> >>> good for stressing the writeback_cache code.
> >>
> >> There is no a reproducer, since I found that by eyes during preparation of another patchset.
> >
> > That's good.  It would even better to have a reproducer, but it
> > doesn't look easy...
> >
> > Completely redid this and reordered the patchset so this change is
> > made before the locking changes actually introduce the bug.
>
> Hm, I meant that I found this during preparation of the patchset,
> but not that fi->lock patchset introduces the bug. I don't think
> the patchset is involved:
>
> 1)before we had race, because different locks fc->lock and fiq->waitq.lock
> are taken in fuse_dev_read() and fuse_writepage_in_flight();
> 2)after we have the same race, and the locks are fi->lock and fiq->waitq.lock.

Ah, so the race was introduced earlier, when fiq->waitq.lock was split
out from fc->lock.

Thanks,
Miklos
Kirill Tkhai Jan. 15, 2019, 4:05 p.m. UTC | #7
On 15.01.2019 19:03, Miklos Szeredi wrote:
> On Tue, Jan 15, 2019 at 4:55 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>
>> On 15.01.2019 18:37, Miklos Szeredi wrote:
>>> On Thu, Jan 10, 2019 at 12:03 PM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>>>
>>>> On 10.01.2019 14:00, Miklos Szeredi wrote:
>>>>> On Thu, Jan 10, 2019 at 11:48 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>>>>>
>>>>>> Hi, Miklos,
>>>>>>
>>>>>> any comments about this?
>>>>>
>>>>> Is there a reproducer?  ISTR that fsx-linux with mmaps enabled was
>>>>> good for stressing the writeback_cache code.
>>>>
>>>> There is no a reproducer, since I found that by eyes during preparation of another patchset.
>>>
>>> That's good.  It would even better to have a reproducer, but it
>>> doesn't look easy...
>>>
>>> Completely redid this and reordered the patchset so this change is
>>> made before the locking changes actually introduce the bug.
>>
>> Hm, I meant that I found this during preparation of the patchset,
>> but not that fi->lock patchset introduces the bug. I don't think
>> the patchset is involved:
>>
>> 1)before we had race, because different locks fc->lock and fiq->waitq.lock
>> are taken in fuse_dev_read() and fuse_writepage_in_flight();
>> 2)after we have the same race, and the locks are fi->lock and fiq->waitq.lock.
> 
> Ah, so the race was introduced earlier, when fiq->waitq.lock was split
> out from fc->lock.

Yeah, and there was another performer, not me :)
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b52f9baaa3e7..c6650c68b31a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1740,6 +1740,7 @@  static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 {
 	struct fuse_conn *fc = get_fuse_conn(new_req->inode);
 	struct fuse_inode *fi = get_fuse_inode(new_req->inode);
+	struct fuse_req *first_req;
 	struct fuse_req *tmp;
 	struct fuse_req *old_req;
 	bool found = false;
@@ -1764,6 +1765,7 @@  static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	}
 
 	new_req->num_pages = 1;
+	first_req = old_req;
 	for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
 		BUG_ON(tmp->inode != new_req->inode);
 		curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
@@ -1773,7 +1775,7 @@  static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 		}
 	}
 
-	if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
+	if (old_req->num_pages == 1 && old_req != first_req) {
 		struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
 
 		copy_highpage(old_req->pages[0], page);