diff mbox

ceph: only dirty ITER_IOVEC pages for direct read

Message ID 20180316033212.30543-1-zyan@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yan, Zheng March 16, 2018, 3:32 a.m. UTC
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/file.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Eric Biggers March 16, 2018, 3:40 a.m. UTC | #1
Hi Yan,

On Fri, Mar 16, 2018 at 11:32:12AM +0800, Yan, Zheng wrote:
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/file.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 

Fixes for use-after-free bugs should be marked for stable.  The commit message
should also explain what is being fixed, exactly.  Mentioning that this bug was
found by syzkaller would also be useful, since people are looking out for those.

Thanks,

Eric
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng March 16, 2018, 5:23 a.m. UTC | #2
On Fri, Mar 16, 2018 at 11:40 AM, Eric Biggers <ebiggers3@gmail.com> wrote:
> Hi Yan,
>
> On Fri, Mar 16, 2018 at 11:32:12AM +0800, Yan, Zheng wrote:
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>>  fs/ceph/file.c | 9 ++++++---
>>  1 file changed, 6 insertions(+), 3 deletions(-)
>>
>
> Fixes for use-after-free bugs should be marked for stable.  The commit message
> should also explain what is being fixed, exactly.  Mentioning that this bug was
> found by syzkaller would also be useful, since people are looking out for those.
>

I added Reported-by and CC stale to  the patch in our testing branch

https://github.com/ceph/ceph-client/commit/cfcd7a9e2d7faf5601b4731ea5a9eff7751981aa

Regards
Yan, Zheng


> Thanks,
>
> Eric
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov March 29, 2018, 4:49 p.m. UTC | #3
On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/file.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 1e9dbe77a880..fef8968011ee 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -614,7 +614,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  struct ceph_aio_request {
>         struct kiocb *iocb;
>         size_t total_len;
> -       int write;
> +       bool write;
> +       bool should_dirty;
>         int error;
>         struct list_head osd_reqs;
>         unsigned int num_reqs;
> @@ -724,7 +725,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
>                 }
>         }
>
> -       ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
> +       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
>         ceph_osdc_put_request(req);
>
>         if (rc < 0)
> @@ -821,6 +822,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>         size_t count = iov_iter_count(iter);
>         loff_t pos = iocb->ki_pos;
>         bool write = iov_iter_rw(iter) == WRITE;
> +       bool should_dirty = !write && iter_is_iovec(iter);
>
>         if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>                 return -EROFS;
> @@ -888,6 +890,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>                         if (aio_req) {
>                                 aio_req->iocb = iocb;
>                                 aio_req->write = write;
> +                               aio_req->should_dirty = should_dirty;
>                                 INIT_LIST_HEAD(&aio_req->osd_reqs);
>                                 if (write) {
>                                         aio_req->mtime = mtime;
> @@ -945,7 +948,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>                                 len = ret;
>                 }
>
> -               ceph_put_page_vector(pages, num_pages, !write);
> +               ceph_put_page_vector(pages, num_pages, should_dirty);
>
>                 ceph_osdc_put_request(req);
>                 if (ret < 0)

Hi Zheng,

Can you explain how this fixes the invalid memory access reported in
"KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
with a different reproducer?

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng March 30, 2018, 3:21 a.m. UTC | #4
> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/file.c | 9 ++++++---
>> 1 file changed, 6 insertions(+), 3 deletions(-)
>> 
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 1e9dbe77a880..fef8968011ee 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -614,7 +614,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>> struct ceph_aio_request {
>>        struct kiocb *iocb;
>>        size_t total_len;
>> -       int write;
>> +       bool write;
>> +       bool should_dirty;
>>        int error;
>>        struct list_head osd_reqs;
>>        unsigned int num_reqs;
>> @@ -724,7 +725,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
>>                }
>>        }
>> 
>> -       ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
>> +       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
>>        ceph_osdc_put_request(req);
>> 
>>        if (rc < 0)
>> @@ -821,6 +822,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>        size_t count = iov_iter_count(iter);
>>        loff_t pos = iocb->ki_pos;
>>        bool write = iov_iter_rw(iter) == WRITE;
>> +       bool should_dirty = !write && iter_is_iovec(iter);
>> 
>>        if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>>                return -EROFS;
>> @@ -888,6 +890,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>                        if (aio_req) {
>>                                aio_req->iocb = iocb;
>>                                aio_req->write = write;
>> +                               aio_req->should_dirty = should_dirty;
>>                                INIT_LIST_HEAD(&aio_req->osd_reqs);
>>                                if (write) {
>>                                        aio_req->mtime = mtime;
>> @@ -945,7 +948,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>                                len = ret;
>>                }
>> 
>> -               ceph_put_page_vector(pages, num_pages, !write);
>> +               ceph_put_page_vector(pages, num_pages, should_dirty);
>> 
>>                ceph_osdc_put_request(req);
>>                if (ret < 0)
> 
> Hi Zheng,
> 
> Can you explain how this fixes the invalid memory access reported in
> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
> with a different reproducer?
> 

I don’t know why KASAN reports use-after-free (like false alarm) in this case. I compared the code with other filesystem, found recent commits make other filesystems only dirty ITER_IOVEC pages. The change should also make KASAN silent.

> Thanks,
> 
>                Ilya

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov March 30, 2018, 7:20 a.m. UTC | #5
On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
>
>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>> ---
>>> fs/ceph/file.c | 9 ++++++---
>>> 1 file changed, 6 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index 1e9dbe77a880..fef8968011ee 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -614,7 +614,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>> struct ceph_aio_request {
>>>        struct kiocb *iocb;
>>>        size_t total_len;
>>> -       int write;
>>> +       bool write;
>>> +       bool should_dirty;
>>>        int error;
>>>        struct list_head osd_reqs;
>>>        unsigned int num_reqs;
>>> @@ -724,7 +725,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
>>>                }
>>>        }
>>>
>>> -       ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
>>> +       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
>>>        ceph_osdc_put_request(req);
>>>
>>>        if (rc < 0)
>>> @@ -821,6 +822,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>        size_t count = iov_iter_count(iter);
>>>        loff_t pos = iocb->ki_pos;
>>>        bool write = iov_iter_rw(iter) == WRITE;
>>> +       bool should_dirty = !write && iter_is_iovec(iter);
>>>
>>>        if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>>>                return -EROFS;
>>> @@ -888,6 +890,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>                        if (aio_req) {
>>>                                aio_req->iocb = iocb;
>>>                                aio_req->write = write;
>>> +                               aio_req->should_dirty = should_dirty;
>>>                                INIT_LIST_HEAD(&aio_req->osd_reqs);
>>>                                if (write) {
>>>                                        aio_req->mtime = mtime;
>>> @@ -945,7 +948,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>                                len = ret;
>>>                }
>>>
>>> -               ceph_put_page_vector(pages, num_pages, !write);
>>> +               ceph_put_page_vector(pages, num_pages, should_dirty);
>>>
>>>                ceph_osdc_put_request(req);
>>>                if (ret < 0)
>>
>> Hi Zheng,
>>
>> Can you explain how this fixes the invalid memory access reported in
>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>> with a different reproducer?
>>
>
> I don’t know why KASAN reports use-after-free (like false alarm) in this case. I compared the code with other filesystem, found recent commits make other filesystems only dirty ITER_IOVEC pages. The change should also make KASAN silent.

Did you manage to reproduce that KASAN report?

If it is a false positive, the patch shouldn't have been marked for
stable.

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng March 30, 2018, 7:26 a.m. UTC | #6
> On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> 
>> 
>>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>> 
>>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>>> ---
>>>> fs/ceph/file.c | 9 ++++++---
>>>> 1 file changed, 6 insertions(+), 3 deletions(-)
>>>> 
>>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>>> index 1e9dbe77a880..fef8968011ee 100644
>>>> --- a/fs/ceph/file.c
>>>> +++ b/fs/ceph/file.c
>>>> @@ -614,7 +614,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>>> struct ceph_aio_request {
>>>>       struct kiocb *iocb;
>>>>       size_t total_len;
>>>> -       int write;
>>>> +       bool write;
>>>> +       bool should_dirty;
>>>>       int error;
>>>>       struct list_head osd_reqs;
>>>>       unsigned int num_reqs;
>>>> @@ -724,7 +725,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
>>>>               }
>>>>       }
>>>> 
>>>> -       ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
>>>> +       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
>>>>       ceph_osdc_put_request(req);
>>>> 
>>>>       if (rc < 0)
>>>> @@ -821,6 +822,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>       size_t count = iov_iter_count(iter);
>>>>       loff_t pos = iocb->ki_pos;
>>>>       bool write = iov_iter_rw(iter) == WRITE;
>>>> +       bool should_dirty = !write && iter_is_iovec(iter);
>>>> 
>>>>       if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>>>>               return -EROFS;
>>>> @@ -888,6 +890,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>                       if (aio_req) {
>>>>                               aio_req->iocb = iocb;
>>>>                               aio_req->write = write;
>>>> +                               aio_req->should_dirty = should_dirty;
>>>>                               INIT_LIST_HEAD(&aio_req->osd_reqs);
>>>>                               if (write) {
>>>>                                       aio_req->mtime = mtime;
>>>> @@ -945,7 +948,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>                               len = ret;
>>>>               }
>>>> 
>>>> -               ceph_put_page_vector(pages, num_pages, !write);
>>>> +               ceph_put_page_vector(pages, num_pages, should_dirty);
>>>> 
>>>>               ceph_osdc_put_request(req);
>>>>               if (ret < 0)
>>> 
>>> Hi Zheng,
>>> 
>>> Can you explain how this fixes the invalid memory access reported in
>>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>>> with a different reproducer?
>>> 
>> 
>> I don’t know why KASAN reports use-after-free (like false alarm) in this case. I compared the code with other filesystem, found recent commits make other filesystems only dirty ITER_IOVEC pages. The change should also make KASAN silent.
> 
> Did you manage to reproduce that KASAN report?
> 
no

> If it is a false positive, the patch shouldn't have been marked for
> stable.
> 

I CCed stable because fuse did the same

commit 8fba54aebbdf1f999738121922e74bf796ad60ee
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Wed Aug 24 18:17:04 2016 +0200

    fuse: direct-io: don't dirty ITER_BVEC pages
    
    When reading from a loop device backed by a fuse file it deadlocks on
    lock_page().
    
    This is because the page is already locked by the read() operation done on
    the loop device.  In this case we don't want to either lock the page or
    dirty it.
    
    So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.


Regards
Yan, Zheng

> Thanks,
> 
>                Ilya

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov March 30, 2018, 9:25 a.m. UTC | #7
On Fri, Mar 30, 2018 at 9:26 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
>
>> On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>
>>>
>>>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>
>>>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>>>> ---
>>>>> fs/ceph/file.c | 9 ++++++---
>>>>> 1 file changed, 6 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>>>> index 1e9dbe77a880..fef8968011ee 100644
>>>>> --- a/fs/ceph/file.c
>>>>> +++ b/fs/ceph/file.c
>>>>> @@ -614,7 +614,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>>>> struct ceph_aio_request {
>>>>>       struct kiocb *iocb;
>>>>>       size_t total_len;
>>>>> -       int write;
>>>>> +       bool write;
>>>>> +       bool should_dirty;
>>>>>       int error;
>>>>>       struct list_head osd_reqs;
>>>>>       unsigned int num_reqs;
>>>>> @@ -724,7 +725,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
>>>>>               }
>>>>>       }
>>>>>
>>>>> -       ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
>>>>> +       ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
>>>>>       ceph_osdc_put_request(req);
>>>>>
>>>>>       if (rc < 0)
>>>>> @@ -821,6 +822,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>>       size_t count = iov_iter_count(iter);
>>>>>       loff_t pos = iocb->ki_pos;
>>>>>       bool write = iov_iter_rw(iter) == WRITE;
>>>>> +       bool should_dirty = !write && iter_is_iovec(iter);
>>>>>
>>>>>       if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>>>>>               return -EROFS;
>>>>> @@ -888,6 +890,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>>                       if (aio_req) {
>>>>>                               aio_req->iocb = iocb;
>>>>>                               aio_req->write = write;
>>>>> +                               aio_req->should_dirty = should_dirty;
>>>>>                               INIT_LIST_HEAD(&aio_req->osd_reqs);
>>>>>                               if (write) {
>>>>>                                       aio_req->mtime = mtime;
>>>>> @@ -945,7 +948,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>>>>>                               len = ret;
>>>>>               }
>>>>>
>>>>> -               ceph_put_page_vector(pages, num_pages, !write);
>>>>> +               ceph_put_page_vector(pages, num_pages, should_dirty);
>>>>>
>>>>>               ceph_osdc_put_request(req);
>>>>>               if (ret < 0)
>>>>
>>>> Hi Zheng,
>>>>
>>>> Can you explain how this fixes the invalid memory access reported in
>>>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>>>> with a different reproducer?
>>>>
>>>
>>> I don\u2019t know why KASAN reports use-after-free (like false alarm) in this case. I compared the code with other filesystem, found recent commits make other filesystems only dirty ITER_IOVEC pages. The change should also make KASAN silent.
>>
>> Did you manage to reproduce that KASAN report?
>>
> no
>
>> If it is a false positive, the patch shouldn't have been marked for
>> stable.
>>
>
> I CCed stable because fuse did the same
>
> commit 8fba54aebbdf1f999738121922e74bf796ad60ee
> Author: Miklos Szeredi <mszeredi@redhat.com>
> Date:   Wed Aug 24 18:17:04 2016 +0200
>
>     fuse: direct-io: don't dirty ITER_BVEC pages
>
>     When reading from a loop device backed by a fuse file it deadlocks on
>     lock_page().
>
>     This is because the page is already locked by the read() operation done on
>     the loop device.  In this case we don't want to either lock the page or
>     dirty it.
>
>     So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.

OK, so it is a potential deadlock which has nothing to do with that
KASAN report.  I have rewritten the changelog to reflect that, please
take a look:

  https://github.com/ceph/ceph-client/commit/85784f9395987a422fa04263e7c0fb13da11eb5c

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Luis Henriques March 30, 2018, 10:24 a.m. UTC | #8
(CC'ing Jeff)

Ilya Dryomov <idryomov@gmail.com> writes:

> On Fri, Mar 30, 2018 at 9:26 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>
>>
>>> On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>
>>> On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>
>>>>
>>>>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>>
>>>>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>
>>>>> Hi Zheng,
>>>>>
>>>>> Can you explain how this fixes the invalid memory access reported in
>>>>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>>>>> with a different reproducer?
>>>>>
>>>>
>>>> I don\u2019t know why KASAN reports use-after-free (like false alarm) in
>>>> this case. I compared the code with other filesystem, found recent commits
>>>> make other filesystems only dirty ITER_IOVEC pages. The change should also
>>>> make KASAN silent.
>>>
>>> Did you manage to reproduce that KASAN report?
>>>
>> no
>>
>>> If it is a false positive, the patch shouldn't have been marked for
>>> stable.
>>>
>>
>> I CCed stable because fuse did the same
>>
>> commit 8fba54aebbdf1f999738121922e74bf796ad60ee
>> Author: Miklos Szeredi <mszeredi@redhat.com>
>> Date:   Wed Aug 24 18:17:04 2016 +0200
>>
>>     fuse: direct-io: don't dirty ITER_BVEC pages
>>
>>     When reading from a loop device backed by a fuse file it deadlocks on
>>     lock_page().
>>
>>     This is because the page is already locked by the read() operation done on
>>     the loop device.  In this case we don't want to either lock the page or
>>     dirty it.
>>
>>     So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.
>
> OK, so it is a potential deadlock which has nothing to do with that
> KASAN report.  I have rewritten the changelog to reflect that, please
> take a look:
>
>   https://github.com/ceph/ceph-client/commit/85784f9395987a422fa04263e7c0fb13da11eb5c

Just as a side note, it's still trivial to get a cephfs-related KASAN
report by running a fuzzer (trinity in my case) against a mainline
kernel 4.16.0-rc7 with this fix backported.

As Jeff mentioned in a different thread, splice syscall is broken on
cephfs and the fix for it is still tagged as "DO NOT MERGE" in the
ceph-client testing branch.  I still think there's some bug in Jeff's
fix as I still see a crash occasionally, but I haven't yet finished
debugging it.  Unfortunately, Jeff's fix is probably not appropriate for
stable kernels (but may I'm wrong).

Cheers,
Ilya Dryomov March 30, 2018, 11:14 a.m. UTC | #9
On Fri, Mar 30, 2018 at 12:24 PM, Luis Henriques <lhenriques@suse.com> wrote:
> (CC'ing Jeff)
>
> Ilya Dryomov <idryomov@gmail.com> writes:
>
>> On Fri, Mar 30, 2018 at 9:26 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>
>>>
>>>> On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>
>>>> On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>
>>>>>
>>>>>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>>>
>>>>>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>>
>>>>>> Hi Zheng,
>>>>>>
>>>>>> Can you explain how this fixes the invalid memory access reported in
>>>>>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>>>>>> with a different reproducer?
>>>>>>
>>>>>
>>>>> I don\u2019t know why KASAN reports use-after-free (like false alarm) in
>>>>> this case. I compared the code with other filesystem, found recent commits
>>>>> make other filesystems only dirty ITER_IOVEC pages. The change should also
>>>>> make KASAN silent.
>>>>
>>>> Did you manage to reproduce that KASAN report?
>>>>
>>> no
>>>
>>>> If it is a false positive, the patch shouldn't have been marked for
>>>> stable.
>>>>
>>>
>>> I CCed stable because fuse did the same
>>>
>>> commit 8fba54aebbdf1f999738121922e74bf796ad60ee
>>> Author: Miklos Szeredi <mszeredi@redhat.com>
>>> Date:   Wed Aug 24 18:17:04 2016 +0200
>>>
>>>     fuse: direct-io: don't dirty ITER_BVEC pages
>>>
>>>     When reading from a loop device backed by a fuse file it deadlocks on
>>>     lock_page().
>>>
>>>     This is because the page is already locked by the read() operation done on
>>>     the loop device.  In this case we don't want to either lock the page or
>>>     dirty it.
>>>
>>>     So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.
>>
>> OK, so it is a potential deadlock which has nothing to do with that
>> KASAN report.  I have rewritten the changelog to reflect that, please
>> take a look:
>>
>>   https://github.com/ceph/ceph-client/commit/85784f9395987a422fa04263e7c0fb13da11eb5c
>
> Just as a side note, it's still trivial to get a cephfs-related KASAN
> report by running a fuzzer (trinity in my case) against a mainline
> kernel 4.16.0-rc7 with this fix backported.

Right.  This patch fixes a deadlock which is completely unrelated.  The
report which I was referring to is probably a by product of ceph splice
brokenness in mainline and not a false positive.

>
> As Jeff mentioned in a different thread, splice syscall is broken on
> cephfs and the fix for it is still tagged as "DO NOT MERGE" in the
> ceph-client testing branch.  I still think there's some bug in Jeff's

Yes, that's why I asked Zheng about a different reproducer.  The
auto-generated one is based on splice, but it didn't trigger anything
for me on testing (i.e. with Jeff's splice patches).

> fix as I still see a crash occasionally, but I haven't yet finished
> debugging it.  Unfortunately, Jeff's fix is probably not appropriate for
> stable kernels (but may I'm wrong).

Need to get it in mainline in some form first...

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Luis Henriques March 30, 2018, 2:46 p.m. UTC | #10
Ilya Dryomov <idryomov@gmail.com> writes:

> On Fri, Mar 30, 2018 at 12:24 PM, Luis Henriques <lhenriques@suse.com> wrote:
>> (CC'ing Jeff)
>>
>> Ilya Dryomov <idryomov@gmail.com> writes:
>>
>>> On Fri, Mar 30, 2018 at 9:26 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>
>>>>
>>>>> On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>>
>>>>> On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>>
>>>>>>
>>>>>>> On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>>>>
>>>>>>> On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>>>
>>>>>>> Hi Zheng,
>>>>>>>
>>>>>>> Can you explain how this fixes the invalid memory access reported in
>>>>>>> "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
>>>>>>> with a different reproducer?
>>>>>>>
>>>>>>
>>>>>> I don\u2019t know why KASAN reports use-after-free (like false alarm) in
>>>>>> this case. I compared the code with other filesystem, found recent commits
>>>>>> make other filesystems only dirty ITER_IOVEC pages. The change should also
>>>>>> make KASAN silent.
>>>>>
>>>>> Did you manage to reproduce that KASAN report?
>>>>>
>>>> no
>>>>
>>>>> If it is a false positive, the patch shouldn't have been marked for
>>>>> stable.
>>>>>
>>>>
>>>> I CCed stable because fuse did the same
>>>>
>>>> commit 8fba54aebbdf1f999738121922e74bf796ad60ee
>>>> Author: Miklos Szeredi <mszeredi@redhat.com>
>>>> Date:   Wed Aug 24 18:17:04 2016 +0200
>>>>
>>>>     fuse: direct-io: don't dirty ITER_BVEC pages
>>>>
>>>>     When reading from a loop device backed by a fuse file it deadlocks on
>>>>     lock_page().
>>>>
>>>>     This is because the page is already locked by the read() operation done on
>>>>     the loop device.  In this case we don't want to either lock the page or
>>>>     dirty it.
>>>>
>>>>     So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.
>>>
>>> OK, so it is a potential deadlock which has nothing to do with that
>>> KASAN report.  I have rewritten the changelog to reflect that, please
>>> take a look:
>>>
>>>   https://github.com/ceph/ceph-client/commit/85784f9395987a422fa04263e7c0fb13da11eb5c
>>
>> Just as a side note, it's still trivial to get a cephfs-related KASAN
>> report by running a fuzzer (trinity in my case) against a mainline
>> kernel 4.16.0-rc7 with this fix backported.
>
> Right.  This patch fixes a deadlock which is completely unrelated.  The
> report which I was referring to is probably a by product of ceph splice
> brokenness in mainline and not a false positive.
>
>>
>> As Jeff mentioned in a different thread, splice syscall is broken on
>> cephfs and the fix for it is still tagged as "DO NOT MERGE" in the
>> ceph-client testing branch.  I still think there's some bug in Jeff's
>
> Yes, that's why I asked Zheng about a different reproducer.  The
> auto-generated one is based on splice, but it didn't trigger anything
> for me on testing (i.e. with Jeff's splice patches).

What I've been using is simply to run trinity with:

 # trinity -V /some/dir/in/cephfs -c splice

With Jeff's patches it may take a while until you get a splat.  However,
I just realised that I may be wrong in my analysis and Jeff's patches
are _probably_ correct.  I've been paying too much attention to the
KASAN report (the one I'm currently looking is "slab-out-of-bounds in
iov_iter_get_pages_alloc").  But what's interesting is what follows it:

[ 2204.022588] iov_iter_get_pages_alloc: array overrun (ffff88006bb63bf0 > ffff88006bb63b88 + 12)
[ 2204.024546] WARNING: CPU: 1 PID: 276 at lib/iov_iter.c:1297 iov_iter_get_pages_alloc+0x1257/0x1290
...

(the line number is likely incorrect, but it's the only WARN_ONCE() in
lib/iov_iter.c).

So, it looks like the bvec is being corrupted somewhere else and I
wonder if that could be something similar to what ext4 fixed with commit
e9e3bcecf44c ("ext4: serialize unaligned asynchronous DIO").  It is
possible that the cephfs client also needs to serialize calls to
ceph_direct_read_write(), but I'm still looking and trying to understand
if this could be the problem.

>> fix as I still see a crash occasionally, but I haven't yet finished
>> debugging it.  Unfortunately, Jeff's fix is probably not appropriate for
>> stable kernels (but may I'm wrong).
>
> Need to get it in mainline in some form first...

Yes, of course.  Let's see if Jeff has any ideas on this and maybe he
can get those patches ready for 4.17.

Cheers,
Jeff Layton March 30, 2018, 7:14 p.m. UTC | #11
On Fri, 2018-03-30 at 11:24 +0100, Luis Henriques wrote:
> (CC'ing Jeff)
> 
> Ilya Dryomov <idryomov@gmail.com> writes:
> 
> > On Fri, Mar 30, 2018 at 9:26 AM, Yan, Zheng <zyan@redhat.com> wrote:
> > > 
> > > 
> > > > On 30 Mar 2018, at 15:20, Ilya Dryomov <idryomov@gmail.com> wrote:
> > > > 
> > > > On Fri, Mar 30, 2018 at 5:21 AM, Yan, Zheng <zyan@redhat.com> wrote:
> > > > > 
> > > > > 
> > > > > > On 30 Mar 2018, at 00:49, Ilya Dryomov <idryomov@gmail.com> wrote:
> > > > > > 
> > > > > > On Fri, Mar 16, 2018 at 4:32 AM, Yan, Zheng <zyan@redhat.com> wrote:
> > > > > > 
> > > > > > Hi Zheng,
> > > > > > 
> > > > > > Can you explain how this fixes the invalid memory access reported in
> > > > > > "KASAN: use-after-free Read in set_page_dirty_lock"?  Did you come up
> > > > > > with a different reproducer?
> > > > > > 
> > > > > 
> > > > > I don\u2019t know why KASAN reports use-after-free (like false alarm) in
> > > > > this case. I compared the code with other filesystem, found recent commits
> > > > > make other filesystems only dirty ITER_IOVEC pages. The change should also
> > > > > make KASAN silent.
> > > > 
> > > > Did you manage to reproduce that KASAN report?
> > > > 
> > > 
> > > no
> > > 
> > > > If it is a false positive, the patch shouldn't have been marked for
> > > > stable.
> > > > 
> > > 
> > > I CCed stable because fuse did the same
> > > 
> > > commit 8fba54aebbdf1f999738121922e74bf796ad60ee
> > > Author: Miklos Szeredi <mszeredi@redhat.com>
> > > Date:   Wed Aug 24 18:17:04 2016 +0200
> > > 
> > >     fuse: direct-io: don't dirty ITER_BVEC pages
> > > 
> > >     When reading from a loop device backed by a fuse file it deadlocks on
> > >     lock_page().
> > > 
> > >     This is because the page is already locked by the read() operation done on
> > >     the loop device.  In this case we don't want to either lock the page or
> > >     dirty it.
> > > 
> > >     So do what fs/direct-io.c does: only dirty the page for ITER_IOVEC vectors.
> > 
> > OK, so it is a potential deadlock which has nothing to do with that
> > KASAN report.  I have rewritten the changelog to reflect that, please
> > take a look:
> > 
> >   https://github.com/ceph/ceph-client/commit/85784f9395987a422fa04263e7c0fb13da11eb5c
> 
> Just as a side note, it's still trivial to get a cephfs-related KASAN
> report by running a fuzzer (trinity in my case) against a mainline
> kernel 4.16.0-rc7 with this fix backported.
> 

I suspect trinity is clobbering the array 
> As Jeff mentioned in a different thread, splice syscall is broken on
> cephfs and the fix for it is still tagged as "DO NOT MERGE" in the
> ceph-client testing branch.  I still think there's some bug in Jeff's
> fix as I still see a crash occasionally, but I haven't yet finished
> debugging it.  Unfortunately, Jeff's fix is probably not appropriate for
> stable kernels (but may I'm wrong).
> 
> Cheers,

(cc'ing Al)

Yeah, I should have revisited this a while ago. So the deal is that I
posted this patch upstream around a year ago, but Al didn't really like
it:

    https://patchwork.kernel.org/patch/9541829/

He wanted to add some iov_iter_for_each_page infrastructure and base
the new function on that. I had thought he had something queued up
along those lines at one point, but I don't see anything in mainline so
far.

There is a iov_iter_for_each_range, but it doesn't handle userland
iovecs and doesn't seem to care about alignment. I think we'll need
both here.

I'm not sure however that a per-page callback is really that helpful
for callers like ceph_direct_read_write. We'll just end up having to do
more  work in the fs to batch up the pages before we add the op to the
req.

iov_iter_get_pages_alloc is really sort of what we want in this case,
and we want that function to stitch together longer arrays when it can.

Al, do you have any thoughts on what we should do here?

Thanks,
diff mbox

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 1e9dbe77a880..fef8968011ee 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -614,7 +614,8 @@  static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 struct ceph_aio_request {
 	struct kiocb *iocb;
 	size_t total_len;
-	int write;
+	bool write;
+	bool should_dirty;
 	int error;
 	struct list_head osd_reqs;
 	unsigned int num_reqs;
@@ -724,7 +725,7 @@  static void ceph_aio_complete_req(struct ceph_osd_request *req)
 		}
 	}
 
-	ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
+	ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
 	ceph_osdc_put_request(req);
 
 	if (rc < 0)
@@ -821,6 +822,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	loff_t pos = iocb->ki_pos;
 	bool write = iov_iter_rw(iter) == WRITE;
+	bool should_dirty = !write && iter_is_iovec(iter);
 
 	if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
@@ -888,6 +890,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 			if (aio_req) {
 				aio_req->iocb = iocb;
 				aio_req->write = write;
+				aio_req->should_dirty = should_dirty;
 				INIT_LIST_HEAD(&aio_req->osd_reqs);
 				if (write) {
 					aio_req->mtime = mtime;
@@ -945,7 +948,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 				len = ret;
 		}
 
-		ceph_put_page_vector(pages, num_pages, !write);
+		ceph_put_page_vector(pages, num_pages, should_dirty);
 
 		ceph_osdc_put_request(req);
 		if (ret < 0)