[v6,1/5] x86/mem_sharing: reorder when pages are unlocked and released
diff mbox series

Message ID 20190717193335.11991-2-tamas@tklengyel.com
State New
Headers show
Series
  • x86/mem_sharing: assorted fixes
Related show

Commit Message

Tamas K Lengyel July 17, 2019, 7:33 p.m. UTC
Calling _put_page_type while also holding the page_lock
for that page can cause a deadlock.

The comment being dropped is incorrect since it's now out-of-date.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
v6: rebase on staging
v5: BUG_ON early before releasing references
---
 xen/arch/x86/mm/mem_sharing.c | 40 +++++++++++------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

Comments

Jan Beulich July 18, 2019, 10:46 a.m. UTC | #1
On 17.07.2019 21:33, Tamas K Lengyel wrote:
> Calling _put_page_type while also holding the page_lock
> for that page can cause a deadlock.
> 
> The comment being dropped is incorrect since it's now out-of-date.
> 
> Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>

The description covers ...

> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -648,10 +648,6 @@ static int page_make_private(struct domain *d, struct page_info *page)
>           return -EBUSY;
>       }
>   
> -    /* We can only change the type if count is one */
> -    /* Because we are locking pages individually, we need to drop
> -     * the lock here, while the page is typed. We cannot risk the
> -     * race of page_unlock and then put_page_type. */
>       expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
>       if ( page->u.inuse.type_info != expected_type )
>       {
> @@ -660,12 +656,11 @@ static int page_make_private(struct domain *d, struct page_info *page)
>           return -EEXIST;
>       }
>   
> +    mem_sharing_page_unlock(page);
> +
>       /* Drop the final typecount */
>       put_page_and_type(page);
>   
> -    /* Now that we've dropped the type, we can unlock */
> -    mem_sharing_page_unlock(page);
> -
>       /* Change the owner */
>       ASSERT(page_get_owner(page) == dom_cow);
>       page_set_owner(page, d);

all of the above. But what about ...

> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>       p2m_type_t smfn_type, cmfn_type;
>       struct two_gfns tg;
>       struct rmap_iterator ri;
> +    unsigned long put_count = 0;
>   
>       get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
>                    cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>           goto err_out;
>       }
>   
> -    /* Acquire an extra reference, for the freeing below to be safe. */
> -    if ( !get_page(cpage, dom_cow) )
> -    {
> -        ret = -EOVERFLOW;
> -        mem_sharing_page_unlock(secondpg);
> -        mem_sharing_page_unlock(firstpg);
> -        goto err_out;
> -    }
> -
>       /* Merge the lists together */
>       rmap_seed_iterator(cpage, &ri);
>       while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>            * Don't change the type of rmap for the client page. */
>           rmap_del(gfn, cpage, 0);
>           rmap_add(gfn, spage);
> -        put_page_and_type(cpage);
> +        put_count++;
>           d = get_domain_by_id(gfn->domain);
>           BUG_ON(!d);
>           BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
>           put_domain(d);
>       }
>       ASSERT(list_empty(&cpage->sharing->gfns));
> +    BUG_ON(!put_count);
>   
>       /* Clear the rest of the shared state */
>       page_sharing_dispose(cpage);
> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>   
>       /* Free the client page */
>       put_page_alloc_ref(cpage);
> -    put_page(cpage);
> +
> +    while ( put_count-- )
> +        put_page_and_type(cpage);
>   
>       /* We managed to free a domain page. */
>       atomic_dec(&nr_shared_mfns);
> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
>       {
>           if ( !last_gfn )
>               mem_sharing_gfn_destroy(page, d, gfn_info);
> -        put_page_and_type(page);
> +
>           mem_sharing_page_unlock(page);
> +
>           if ( last_gfn )
> -        {
> -            if ( !get_page(page, dom_cow) )
> -            {
> -                put_gfn(d, gfn);
> -                domain_crash(d);
> -                return -EOVERFLOW;
> -            }
>               put_page_alloc_ref(page);
> -            put_page(page);
> -        }
> +
> +        put_page_and_type(page);
>           put_gfn(d, gfn);
>   
>           return 0;

... this (main, as I guess by the title) part of the change? I think
you want to explain what was wrong here and/or why the new arrangement
is better. (I'm sorry, I guess it was this way on prior versions
already, but apparently I didn't notice.)

Jan
Tamas K Lengyel July 18, 2019, 12:55 p.m. UTC | #2
On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> On 17.07.2019 21:33, Tamas K Lengyel wrote:
> > Calling _put_page_type while also holding the page_lock
> > for that page can cause a deadlock.
> >
> > The comment being dropped is incorrect since it's now out-of-date.
> >
> > Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
>
> The description covers ...
>
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -648,10 +648,6 @@ static int page_make_private(struct domain *d, struct page_info *page)
> >           return -EBUSY;
> >       }
> >
> > -    /* We can only change the type if count is one */
> > -    /* Because we are locking pages individually, we need to drop
> > -     * the lock here, while the page is typed. We cannot risk the
> > -     * race of page_unlock and then put_page_type. */
> >       expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
> >       if ( page->u.inuse.type_info != expected_type )
> >       {
> > @@ -660,12 +656,11 @@ static int page_make_private(struct domain *d, struct page_info *page)
> >           return -EEXIST;
> >       }
> >
> > +    mem_sharing_page_unlock(page);
> > +
> >       /* Drop the final typecount */
> >       put_page_and_type(page);
> >
> > -    /* Now that we've dropped the type, we can unlock */
> > -    mem_sharing_page_unlock(page);
> > -
> >       /* Change the owner */
> >       ASSERT(page_get_owner(page) == dom_cow);
> >       page_set_owner(page, d);
>
> all of the above. But what about ...
>
> > @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >       p2m_type_t smfn_type, cmfn_type;
> >       struct two_gfns tg;
> >       struct rmap_iterator ri;
> > +    unsigned long put_count = 0;
> >
> >       get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
> >                    cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
> > @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >           goto err_out;
> >       }
> >
> > -    /* Acquire an extra reference, for the freeing below to be safe. */
> > -    if ( !get_page(cpage, dom_cow) )
> > -    {
> > -        ret = -EOVERFLOW;
> > -        mem_sharing_page_unlock(secondpg);
> > -        mem_sharing_page_unlock(firstpg);
> > -        goto err_out;
> > -    }
> > -
> >       /* Merge the lists together */
> >       rmap_seed_iterator(cpage, &ri);
> >       while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
> > @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >            * Don't change the type of rmap for the client page. */
> >           rmap_del(gfn, cpage, 0);
> >           rmap_add(gfn, spage);
> > -        put_page_and_type(cpage);
> > +        put_count++;
> >           d = get_domain_by_id(gfn->domain);
> >           BUG_ON(!d);
> >           BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
> >           put_domain(d);
> >       }
> >       ASSERT(list_empty(&cpage->sharing->gfns));
> > +    BUG_ON(!put_count);
> >
> >       /* Clear the rest of the shared state */
> >       page_sharing_dispose(cpage);
> > @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >
> >       /* Free the client page */
> >       put_page_alloc_ref(cpage);
> > -    put_page(cpage);
> > +
> > +    while ( put_count-- )
> > +        put_page_and_type(cpage);
> >
> >       /* We managed to free a domain page. */
> >       atomic_dec(&nr_shared_mfns);
> > @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
> >       {
> >           if ( !last_gfn )
> >               mem_sharing_gfn_destroy(page, d, gfn_info);
> > -        put_page_and_type(page);
> > +
> >           mem_sharing_page_unlock(page);
> > +
> >           if ( last_gfn )
> > -        {
> > -            if ( !get_page(page, dom_cow) )
> > -            {
> > -                put_gfn(d, gfn);
> > -                domain_crash(d);
> > -                return -EOVERFLOW;
> > -            }
> >               put_page_alloc_ref(page);
> > -            put_page(page);
> > -        }
> > +
> > +        put_page_and_type(page);
> >           put_gfn(d, gfn);
> >
> >           return 0;
>
> ... this (main, as I guess by the title) part of the change? I think
> you want to explain what was wrong here and/or why the new arrangement
> is better. (I'm sorry, I guess it was this way on prior versions
> already, but apparently I didn't notice.)

It's what the patch message says - calling put_page_and_type before
mem_sharing_page_unlock can cause a deadlock. Since now we are now
holding a reference to the page till the end there is no need for the
extra get_page/put_page logic when we are dealing with the last_gfn.

Tamas
Jan Beulich July 18, 2019, 1:12 p.m. UTC | #3
On 18.07.2019 14:55, Tamas K Lengyel wrote:
> On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
>> On 17.07.2019 21:33, Tamas K Lengyel wrote:
>>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>        p2m_type_t smfn_type, cmfn_type;
>>>        struct two_gfns tg;
>>>        struct rmap_iterator ri;
>>> +    unsigned long put_count = 0;
>>>
>>>        get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
>>>                     cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
>>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>            goto err_out;
>>>        }
>>>
>>> -    /* Acquire an extra reference, for the freeing below to be safe. */
>>> -    if ( !get_page(cpage, dom_cow) )
>>> -    {
>>> -        ret = -EOVERFLOW;
>>> -        mem_sharing_page_unlock(secondpg);
>>> -        mem_sharing_page_unlock(firstpg);
>>> -        goto err_out;
>>> -    }
>>> -
>>>        /* Merge the lists together */
>>>        rmap_seed_iterator(cpage, &ri);
>>>        while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
>>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>             * Don't change the type of rmap for the client page. */
>>>            rmap_del(gfn, cpage, 0);
>>>            rmap_add(gfn, spage);
>>> -        put_page_and_type(cpage);
>>> +        put_count++;
>>>            d = get_domain_by_id(gfn->domain);
>>>            BUG_ON(!d);
>>>            BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
>>>            put_domain(d);
>>>        }
>>>        ASSERT(list_empty(&cpage->sharing->gfns));
>>> +    BUG_ON(!put_count);
>>>
>>>        /* Clear the rest of the shared state */
>>>        page_sharing_dispose(cpage);
>>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>
>>>        /* Free the client page */
>>>        put_page_alloc_ref(cpage);
>>> -    put_page(cpage);
>>> +
>>> +    while ( put_count-- )
>>> +        put_page_and_type(cpage);
>>>
>>>        /* We managed to free a domain page. */
>>>        atomic_dec(&nr_shared_mfns);
>>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
>>>        {
>>>            if ( !last_gfn )
>>>                mem_sharing_gfn_destroy(page, d, gfn_info);
>>> -        put_page_and_type(page);
>>> +
>>>            mem_sharing_page_unlock(page);
>>> +
>>>            if ( last_gfn )
>>> -        {
>>> -            if ( !get_page(page, dom_cow) )
>>> -            {
>>> -                put_gfn(d, gfn);
>>> -                domain_crash(d);
>>> -                return -EOVERFLOW;
>>> -            }
>>>                put_page_alloc_ref(page);
>>> -            put_page(page);
>>> -        }
>>> +
>>> +        put_page_and_type(page);
>>>            put_gfn(d, gfn);
>>>
>>>            return 0;
>>
>> ... this (main, as I guess by the title) part of the change? I think
>> you want to explain what was wrong here and/or why the new arrangement
>> is better. (I'm sorry, I guess it was this way on prior versions
>> already, but apparently I didn't notice.)
> 
> It's what the patch message says - calling put_page_and_type before
> mem_sharing_page_unlock can cause a deadlock. Since now we are now
> holding a reference to the page till the end there is no need for the
> extra get_page/put_page logic when we are dealing with the last_gfn.

The title says "reorder" without any "why".

Jan
Tamas K Lengyel July 18, 2019, 1:13 p.m. UTC | #4
On Thu, Jul 18, 2019 at 7:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> On 18.07.2019 14:55, Tamas K Lengyel wrote:
> > On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
> >> On 17.07.2019 21:33, Tamas K Lengyel wrote:
> >>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>        p2m_type_t smfn_type, cmfn_type;
> >>>        struct two_gfns tg;
> >>>        struct rmap_iterator ri;
> >>> +    unsigned long put_count = 0;
> >>>
> >>>        get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
> >>>                     cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
> >>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>            goto err_out;
> >>>        }
> >>>
> >>> -    /* Acquire an extra reference, for the freeing below to be safe. */
> >>> -    if ( !get_page(cpage, dom_cow) )
> >>> -    {
> >>> -        ret = -EOVERFLOW;
> >>> -        mem_sharing_page_unlock(secondpg);
> >>> -        mem_sharing_page_unlock(firstpg);
> >>> -        goto err_out;
> >>> -    }
> >>> -
> >>>        /* Merge the lists together */
> >>>        rmap_seed_iterator(cpage, &ri);
> >>>        while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
> >>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>             * Don't change the type of rmap for the client page. */
> >>>            rmap_del(gfn, cpage, 0);
> >>>            rmap_add(gfn, spage);
> >>> -        put_page_and_type(cpage);
> >>> +        put_count++;
> >>>            d = get_domain_by_id(gfn->domain);
> >>>            BUG_ON(!d);
> >>>            BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
> >>>            put_domain(d);
> >>>        }
> >>>        ASSERT(list_empty(&cpage->sharing->gfns));
> >>> +    BUG_ON(!put_count);
> >>>
> >>>        /* Clear the rest of the shared state */
> >>>        page_sharing_dispose(cpage);
> >>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>
> >>>        /* Free the client page */
> >>>        put_page_alloc_ref(cpage);
> >>> -    put_page(cpage);
> >>> +
> >>> +    while ( put_count-- )
> >>> +        put_page_and_type(cpage);
> >>>
> >>>        /* We managed to free a domain page. */
> >>>        atomic_dec(&nr_shared_mfns);
> >>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
> >>>        {
> >>>            if ( !last_gfn )
> >>>                mem_sharing_gfn_destroy(page, d, gfn_info);
> >>> -        put_page_and_type(page);
> >>> +
> >>>            mem_sharing_page_unlock(page);
> >>> +
> >>>            if ( last_gfn )
> >>> -        {
> >>> -            if ( !get_page(page, dom_cow) )
> >>> -            {
> >>> -                put_gfn(d, gfn);
> >>> -                domain_crash(d);
> >>> -                return -EOVERFLOW;
> >>> -            }
> >>>                put_page_alloc_ref(page);
> >>> -            put_page(page);
> >>> -        }
> >>> +
> >>> +        put_page_and_type(page);
> >>>            put_gfn(d, gfn);
> >>>
> >>>            return 0;
> >>
> >> ... this (main, as I guess by the title) part of the change? I think
> >> you want to explain what was wrong here and/or why the new arrangement
> >> is better. (I'm sorry, I guess it was this way on prior versions
> >> already, but apparently I didn't notice.)
> >
> > It's what the patch message says - calling put_page_and_type before
> > mem_sharing_page_unlock can cause a deadlock. Since now we are now
> > holding a reference to the page till the end there is no need for the
> > extra get_page/put_page logic when we are dealing with the last_gfn.
>
> The title says "reorder" without any "why".

Yes, I can't reasonably fit "Calling _put_page_type while also holding
the page_lock for that page can cause a deadlock." into the title. So
it's spelled out in the patch message.

Tamas
Jan Beulich July 18, 2019, 1:33 p.m. UTC | #5
On 18.07.2019 15:13, Tamas K Lengyel wrote:
> On Thu, Jul 18, 2019 at 7:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> On 18.07.2019 14:55, Tamas K Lengyel wrote:
>>> On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 17.07.2019 21:33, Tamas K Lengyel wrote:
>>>>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>         p2m_type_t smfn_type, cmfn_type;
>>>>>         struct two_gfns tg;
>>>>>         struct rmap_iterator ri;
>>>>> +    unsigned long put_count = 0;
>>>>>
>>>>>         get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
>>>>>                      cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
>>>>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>             goto err_out;
>>>>>         }
>>>>>
>>>>> -    /* Acquire an extra reference, for the freeing below to be safe. */
>>>>> -    if ( !get_page(cpage, dom_cow) )
>>>>> -    {
>>>>> -        ret = -EOVERFLOW;
>>>>> -        mem_sharing_page_unlock(secondpg);
>>>>> -        mem_sharing_page_unlock(firstpg);
>>>>> -        goto err_out;
>>>>> -    }
>>>>> -
>>>>>         /* Merge the lists together */
>>>>>         rmap_seed_iterator(cpage, &ri);
>>>>>         while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
>>>>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>              * Don't change the type of rmap for the client page. */
>>>>>             rmap_del(gfn, cpage, 0);
>>>>>             rmap_add(gfn, spage);
>>>>> -        put_page_and_type(cpage);
>>>>> +        put_count++;
>>>>>             d = get_domain_by_id(gfn->domain);
>>>>>             BUG_ON(!d);
>>>>>             BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
>>>>>             put_domain(d);
>>>>>         }
>>>>>         ASSERT(list_empty(&cpage->sharing->gfns));
>>>>> +    BUG_ON(!put_count);
>>>>>
>>>>>         /* Clear the rest of the shared state */
>>>>>         page_sharing_dispose(cpage);
>>>>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>
>>>>>         /* Free the client page */
>>>>>         put_page_alloc_ref(cpage);
>>>>> -    put_page(cpage);
>>>>> +
>>>>> +    while ( put_count-- )
>>>>> +        put_page_and_type(cpage);
>>>>>
>>>>>         /* We managed to free a domain page. */
>>>>>         atomic_dec(&nr_shared_mfns);
>>>>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
>>>>>         {
>>>>>             if ( !last_gfn )
>>>>>                 mem_sharing_gfn_destroy(page, d, gfn_info);
>>>>> -        put_page_and_type(page);
>>>>> +
>>>>>             mem_sharing_page_unlock(page);
>>>>> +
>>>>>             if ( last_gfn )
>>>>> -        {
>>>>> -            if ( !get_page(page, dom_cow) )
>>>>> -            {
>>>>> -                put_gfn(d, gfn);
>>>>> -                domain_crash(d);
>>>>> -                return -EOVERFLOW;
>>>>> -            }
>>>>>                 put_page_alloc_ref(page);
>>>>> -            put_page(page);
>>>>> -        }
>>>>> +
>>>>> +        put_page_and_type(page);
>>>>>             put_gfn(d, gfn);
>>>>>
>>>>>             return 0;
>>>>
>>>> ... this (main, as I guess by the title) part of the change? I think
>>>> you want to explain what was wrong here and/or why the new arrangement
>>>> is better. (I'm sorry, I guess it was this way on prior versions
>>>> already, but apparently I didn't notice.)
>>>
>>> It's what the patch message says - calling put_page_and_type before
>>> mem_sharing_page_unlock can cause a deadlock. Since now we are now
>>> holding a reference to the page till the end there is no need for the
>>> extra get_page/put_page logic when we are dealing with the last_gfn.
>>
>> The title says "reorder" without any "why".
> 
> Yes, I can't reasonably fit "Calling _put_page_type while also holding
> the page_lock for that page can cause a deadlock." into the title. So
> it's spelled out in the patch message.

Of course not. And I realize _part_ of the changes is indeed what the
title says (although for share_pages() that's not obvious from the
patch alone). But you do more: You also avoid acquiring an extra
reference in share_pages().

And since you made me look at the code again: If put_page() is unsafe
with a lock held, how come the get_page_and_type() in share_pages()
is safe with two such locks held? If it really is, it surely would be
worthwhile to state in the description. There's another such instance
in mem_sharing_add_to_physmap() (plus a get_page()), and also one
where put_page_and_type() gets called with such a lock held (afaics).

Jan
Tamas K Lengyel July 18, 2019, 1:47 p.m. UTC | #6
On Thu, Jul 18, 2019 at 7:33 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> On 18.07.2019 15:13, Tamas K Lengyel wrote:
> > On Thu, Jul 18, 2019 at 7:12 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>
> >> On 18.07.2019 14:55, Tamas K Lengyel wrote:
> >>> On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>>> On 17.07.2019 21:33, Tamas K Lengyel wrote:
> >>>>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>         p2m_type_t smfn_type, cmfn_type;
> >>>>>         struct two_gfns tg;
> >>>>>         struct rmap_iterator ri;
> >>>>> +    unsigned long put_count = 0;
> >>>>>
> >>>>>         get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
> >>>>>                      cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
> >>>>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>             goto err_out;
> >>>>>         }
> >>>>>
> >>>>> -    /* Acquire an extra reference, for the freeing below to be safe. */
> >>>>> -    if ( !get_page(cpage, dom_cow) )
> >>>>> -    {
> >>>>> -        ret = -EOVERFLOW;
> >>>>> -        mem_sharing_page_unlock(secondpg);
> >>>>> -        mem_sharing_page_unlock(firstpg);
> >>>>> -        goto err_out;
> >>>>> -    }
> >>>>> -
> >>>>>         /* Merge the lists together */
> >>>>>         rmap_seed_iterator(cpage, &ri);
> >>>>>         while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
> >>>>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>              * Don't change the type of rmap for the client page. */
> >>>>>             rmap_del(gfn, cpage, 0);
> >>>>>             rmap_add(gfn, spage);
> >>>>> -        put_page_and_type(cpage);
> >>>>> +        put_count++;
> >>>>>             d = get_domain_by_id(gfn->domain);
> >>>>>             BUG_ON(!d);
> >>>>>             BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
> >>>>>             put_domain(d);
> >>>>>         }
> >>>>>         ASSERT(list_empty(&cpage->sharing->gfns));
> >>>>> +    BUG_ON(!put_count);
> >>>>>
> >>>>>         /* Clear the rest of the shared state */
> >>>>>         page_sharing_dispose(cpage);
> >>>>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>
> >>>>>         /* Free the client page */
> >>>>>         put_page_alloc_ref(cpage);
> >>>>> -    put_page(cpage);
> >>>>> +
> >>>>> +    while ( put_count-- )
> >>>>> +        put_page_and_type(cpage);
> >>>>>
> >>>>>         /* We managed to free a domain page. */
> >>>>>         atomic_dec(&nr_shared_mfns);
> >>>>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
> >>>>>         {
> >>>>>             if ( !last_gfn )
> >>>>>                 mem_sharing_gfn_destroy(page, d, gfn_info);
> >>>>> -        put_page_and_type(page);
> >>>>> +
> >>>>>             mem_sharing_page_unlock(page);
> >>>>> +
> >>>>>             if ( last_gfn )
> >>>>> -        {
> >>>>> -            if ( !get_page(page, dom_cow) )
> >>>>> -            {
> >>>>> -                put_gfn(d, gfn);
> >>>>> -                domain_crash(d);
> >>>>> -                return -EOVERFLOW;
> >>>>> -            }
> >>>>>                 put_page_alloc_ref(page);
> >>>>> -            put_page(page);
> >>>>> -        }
> >>>>> +
> >>>>> +        put_page_and_type(page);
> >>>>>             put_gfn(d, gfn);
> >>>>>
> >>>>>             return 0;
> >>>>
> >>>> ... this (main, as I guess by the title) part of the change? I think
> >>>> you want to explain what was wrong here and/or why the new arrangement
> >>>> is better. (I'm sorry, I guess it was this way on prior versions
> >>>> already, but apparently I didn't notice.)
> >>>
> >>> It's what the patch message says - calling put_page_and_type before
> >>> mem_sharing_page_unlock can cause a deadlock. Since now we are now
> >>> holding a reference to the page till the end there is no need for the
> >>> extra get_page/put_page logic when we are dealing with the last_gfn.
> >>
> >> The title says "reorder" without any "why".
> >
> > Yes, I can't reasonably fit "Calling _put_page_type while also holding
> > the page_lock for that page can cause a deadlock." into the title. So
> > it's spelled out in the patch message.
>
> Of course not. And I realize _part_ of the changes is indeed what the
> title says (although for share_pages() that's not obvious from the
> patch alone). But you do more: You also avoid acquiring an extra
> reference in share_pages().

I feel like we are going in circles and having the same conversations
over and over without really making any headway. You introduced
grabbing the broken extra reference in 0502e0adae2. It is and was
actually unneeded to start with if the proper solution was put in
place, which is what this patch does, reordering things.

> And since you made me look at the code again: If put_page() is unsafe
> with a lock held, how come the get_page_and_type() in share_pages()
> is safe with two such locks held? If it really is, it surely would be
> worthwhile to state in the description. There's another such instance
> in mem_sharing_add_to_physmap() (plus a get_page()), and also one
> where put_page_and_type() gets called with such a lock held (afaics).
>

It's possible there are other instances where this may still be
broken. Right now I only have bandwidth to test and fix the paths I
use. If that's unacceptable I'm happy to continue development in my
private fork and leave things as-is upstream.

Tamas
Jan Beulich July 18, 2019, 2:28 p.m. UTC | #7
On 18.07.2019 15:47, Tamas K Lengyel wrote:
> On Thu, Jul 18, 2019 at 7:33 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> On 18.07.2019 15:13, Tamas K Lengyel wrote:
>>> On Thu, Jul 18, 2019 at 7:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>>>>
>>>> On 18.07.2019 14:55, Tamas K Lengyel wrote:
>>>>> On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 17.07.2019 21:33, Tamas K Lengyel wrote:
>>>>>>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>>>          p2m_type_t smfn_type, cmfn_type;
>>>>>>>          struct two_gfns tg;
>>>>>>>          struct rmap_iterator ri;
>>>>>>> +    unsigned long put_count = 0;
>>>>>>>
>>>>>>>          get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
>>>>>>>                       cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
>>>>>>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>>>              goto err_out;
>>>>>>>          }
>>>>>>>
>>>>>>> -    /* Acquire an extra reference, for the freeing below to be safe. */
>>>>>>> -    if ( !get_page(cpage, dom_cow) )
>>>>>>> -    {
>>>>>>> -        ret = -EOVERFLOW;
>>>>>>> -        mem_sharing_page_unlock(secondpg);
>>>>>>> -        mem_sharing_page_unlock(firstpg);
>>>>>>> -        goto err_out;
>>>>>>> -    }
>>>>>>> -
>>>>>>>          /* Merge the lists together */
>>>>>>>          rmap_seed_iterator(cpage, &ri);
>>>>>>>          while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
>>>>>>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>>>               * Don't change the type of rmap for the client page. */
>>>>>>>              rmap_del(gfn, cpage, 0);
>>>>>>>              rmap_add(gfn, spage);
>>>>>>> -        put_page_and_type(cpage);
>>>>>>> +        put_count++;
>>>>>>>              d = get_domain_by_id(gfn->domain);
>>>>>>>              BUG_ON(!d);
>>>>>>>              BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
>>>>>>>              put_domain(d);
>>>>>>>          }
>>>>>>>          ASSERT(list_empty(&cpage->sharing->gfns));
>>>>>>> +    BUG_ON(!put_count);
>>>>>>>
>>>>>>>          /* Clear the rest of the shared state */
>>>>>>>          page_sharing_dispose(cpage);
>>>>>>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>>>>>>>
>>>>>>>          /* Free the client page */
>>>>>>>          put_page_alloc_ref(cpage);
>>>>>>> -    put_page(cpage);
>>>>>>> +
>>>>>>> +    while ( put_count-- )
>>>>>>> +        put_page_and_type(cpage);
>>>>>>>
>>>>>>>          /* We managed to free a domain page. */
>>>>>>>          atomic_dec(&nr_shared_mfns);
>>>>>>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
>>>>>>>          {
>>>>>>>              if ( !last_gfn )
>>>>>>>                  mem_sharing_gfn_destroy(page, d, gfn_info);
>>>>>>> -        put_page_and_type(page);
>>>>>>> +
>>>>>>>              mem_sharing_page_unlock(page);
>>>>>>> +
>>>>>>>              if ( last_gfn )
>>>>>>> -        {
>>>>>>> -            if ( !get_page(page, dom_cow) )
>>>>>>> -            {
>>>>>>> -                put_gfn(d, gfn);
>>>>>>> -                domain_crash(d);
>>>>>>> -                return -EOVERFLOW;
>>>>>>> -            }
>>>>>>>                  put_page_alloc_ref(page);
>>>>>>> -            put_page(page);
>>>>>>> -        }
>>>>>>> +
>>>>>>> +        put_page_and_type(page);
>>>>>>>              put_gfn(d, gfn);
>>>>>>>
>>>>>>>              return 0;
>>>>>>
>>>>>> ... this (main, as I guess by the title) part of the change? I think
>>>>>> you want to explain what was wrong here and/or why the new arrangement
>>>>>> is better. (I'm sorry, I guess it was this way on prior versions
>>>>>> already, but apparently I didn't notice.)
>>>>>
>>>>> It's what the patch message says - calling put_page_and_type before
>>>>> mem_sharing_page_unlock can cause a deadlock. Since now we are now
>>>>> holding a reference to the page till the end there is no need for the
>>>>> extra get_page/put_page logic when we are dealing with the last_gfn.
>>>>
>>>> The title says "reorder" without any "why".
>>>
>>> Yes, I can't reasonably fit "Calling _put_page_type while also holding
>>> the page_lock for that page can cause a deadlock." into the title. So
>>> it's spelled out in the patch message.
>>
>> Of course not. And I realize _part_ of the changes is indeed what the
>> title says (although for share_pages() that's not obvious from the
>> patch alone). But you do more: You also avoid acquiring an extra
>> reference in share_pages().
> 
> I feel like we are going in circles and having the same conversations
> over and over without really making any headway. You introduced
> grabbing the broken extra reference in 0502e0adae2. It is and was
> actually unneeded to start with if the proper solution was put in
> place, which is what this patch does, reordering things.

I'm not complaining about the changes; I'd merely like the description
state why they're needed.

>> And since you made me look at the code again: If put_page() is unsafe
>> with a lock held, how come the get_page_and_type() in share_pages()
>> is safe with two such locks held? If it really is, it surely would be
>> worthwhile to state in the description. There's another such instance
>> in mem_sharing_add_to_physmap() (plus a get_page()), and also one
>> where put_page_and_type() gets called with such a lock held (afaics).
> 
> It's possible there are other instances where this may still be
> broken. Right now I only have bandwidth to test and fix the paths I
> use. If that's unacceptable I'm happy to continue development in my
> private fork and leave things as-is upstream.

Similarly, if there are limitations - fine. But please say so in the
description, to avoid giving the impression that the issues have been
taken care of altogether.

Jan
Tamas K Lengyel July 18, 2019, 2:35 p.m. UTC | #8
On Thu, Jul 18, 2019 at 8:28 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> On 18.07.2019 15:47, Tamas K Lengyel wrote:
> > On Thu, Jul 18, 2019 at 7:33 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>
> >> On 18.07.2019 15:13, Tamas K Lengyel wrote:
> >>> On Thu, Jul 18, 2019 at 7:12 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>>>
> >>>> On 18.07.2019 14:55, Tamas K Lengyel wrote:
> >>>>> On Thu, Jul 18, 2019 at 4:47 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>>>>> On 17.07.2019 21:33, Tamas K Lengyel wrote:
> >>>>>>> @@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>>>          p2m_type_t smfn_type, cmfn_type;
> >>>>>>>          struct two_gfns tg;
> >>>>>>>          struct rmap_iterator ri;
> >>>>>>> +    unsigned long put_count = 0;
> >>>>>>>
> >>>>>>>          get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
> >>>>>>>                       cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
> >>>>>>> @@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>>>              goto err_out;
> >>>>>>>          }
> >>>>>>>
> >>>>>>> -    /* Acquire an extra reference, for the freeing below to be safe. */
> >>>>>>> -    if ( !get_page(cpage, dom_cow) )
> >>>>>>> -    {
> >>>>>>> -        ret = -EOVERFLOW;
> >>>>>>> -        mem_sharing_page_unlock(secondpg);
> >>>>>>> -        mem_sharing_page_unlock(firstpg);
> >>>>>>> -        goto err_out;
> >>>>>>> -    }
> >>>>>>> -
> >>>>>>>          /* Merge the lists together */
> >>>>>>>          rmap_seed_iterator(cpage, &ri);
> >>>>>>>          while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
> >>>>>>> @@ -984,13 +971,14 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>>>               * Don't change the type of rmap for the client page. */
> >>>>>>>              rmap_del(gfn, cpage, 0);
> >>>>>>>              rmap_add(gfn, spage);
> >>>>>>> -        put_page_and_type(cpage);
> >>>>>>> +        put_count++;
> >>>>>>>              d = get_domain_by_id(gfn->domain);
> >>>>>>>              BUG_ON(!d);
> >>>>>>>              BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
> >>>>>>>              put_domain(d);
> >>>>>>>          }
> >>>>>>>          ASSERT(list_empty(&cpage->sharing->gfns));
> >>>>>>> +    BUG_ON(!put_count);
> >>>>>>>
> >>>>>>>          /* Clear the rest of the shared state */
> >>>>>>>          page_sharing_dispose(cpage);
> >>>>>>> @@ -1001,7 +989,9 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >>>>>>>
> >>>>>>>          /* Free the client page */
> >>>>>>>          put_page_alloc_ref(cpage);
> >>>>>>> -    put_page(cpage);
> >>>>>>> +
> >>>>>>> +    while ( put_count-- )
> >>>>>>> +        put_page_and_type(cpage);
> >>>>>>>
> >>>>>>>          /* We managed to free a domain page. */
> >>>>>>>          atomic_dec(&nr_shared_mfns);
> >>>>>>> @@ -1165,19 +1155,13 @@ int __mem_sharing_unshare_page(struct domain *d,
> >>>>>>>          {
> >>>>>>>              if ( !last_gfn )
> >>>>>>>                  mem_sharing_gfn_destroy(page, d, gfn_info);
> >>>>>>> -        put_page_and_type(page);
> >>>>>>> +
> >>>>>>>              mem_sharing_page_unlock(page);
> >>>>>>> +
> >>>>>>>              if ( last_gfn )
> >>>>>>> -        {
> >>>>>>> -            if ( !get_page(page, dom_cow) )
> >>>>>>> -            {
> >>>>>>> -                put_gfn(d, gfn);
> >>>>>>> -                domain_crash(d);
> >>>>>>> -                return -EOVERFLOW;
> >>>>>>> -            }
> >>>>>>>                  put_page_alloc_ref(page);
> >>>>>>> -            put_page(page);
> >>>>>>> -        }
> >>>>>>> +
> >>>>>>> +        put_page_and_type(page);
> >>>>>>>              put_gfn(d, gfn);
> >>>>>>>
> >>>>>>>              return 0;
> >>>>>>
> >>>>>> ... this (main, as I guess by the title) part of the change? I think
> >>>>>> you want to explain what was wrong here and/or why the new arrangement
> >>>>>> is better. (I'm sorry, I guess it was this way on prior versions
> >>>>>> already, but apparently I didn't notice.)
> >>>>>
> >>>>> It's what the patch message says - calling put_page_and_type before
> >>>>> mem_sharing_page_unlock can cause a deadlock. Since now we are now
> >>>>> holding a reference to the page till the end there is no need for the
> >>>>> extra get_page/put_page logic when we are dealing with the last_gfn.
> >>>>
> >>>> The title says "reorder" without any "why".
> >>>
> >>> Yes, I can't reasonably fit "Calling _put_page_type while also holding
> >>> the page_lock for that page can cause a deadlock." into the title. So
> >>> it's spelled out in the patch message.
> >>
> >> Of course not. And I realize _part_ of the changes is indeed what the
> >> title says (although for share_pages() that's not obvious from the
> >> patch alone). But you do more: You also avoid acquiring an extra
> >> reference in share_pages().
> >
> > I feel like we are going in circles and having the same conversations
> > over and over without really making any headway. You introduced
> > grabbing the broken extra reference in 0502e0adae2. It is and was
> > actually unneeded to start with if the proper solution was put in
> > place, which is what this patch does, reordering things.
>
> I'm not complaining about the changes; I'd merely like the description
> state why they're needed.

OK.

>
> >> And since you made me look at the code again: If put_page() is unsafe
> >> with a lock held, how come the get_page_and_type() in share_pages()
> >> is safe with two such locks held? If it really is, it surely would be
> >> worthwhile to state in the description. There's another such instance
> >> in mem_sharing_add_to_physmap() (plus a get_page()), and also one
> >> where put_page_and_type() gets called with such a lock held (afaics).
> >
> > It's possible there are other instances where this may still be
> > broken. Right now I only have bandwidth to test and fix the paths I
> > use. If that's unacceptable I'm happy to continue development in my
> > private fork and leave things as-is upstream.
>
> Similarly, if there are limitations - fine. But please say so in the
> description, to avoid giving the impression that the issues have been
> taken care of altogether.
>

Fair enough.

Tamas
Jan Beulich July 18, 2019, 2:47 p.m. UTC | #9
On 18.07.2019 16:35, Tamas K Lengyel wrote:
> On Thu, Jul 18, 2019 at 8:28 AM Jan Beulich <JBeulich@suse.com> wrote:
>> On 18.07.2019 15:47, Tamas K Lengyel wrote:
>>> I feel like we are going in circles and having the same conversations
>>> over and over without really making any headway. You introduced
>>> grabbing the broken extra reference in 0502e0adae2. It is and was
>>> actually unneeded to start with if the proper solution was put in
>>> place, which is what this patch does, reordering things.
>>
>> I'm not complaining about the changes; I'd merely like the description
>> state why they're needed.
> 
> OK.
> 
>>> It's possible there are other instances where this may still be
>>> broken. Right now I only have bandwidth to test and fix the paths I
>>> use. If that's unacceptable I'm happy to continue development in my
>>> private fork and leave things as-is upstream.
>>
>> Similarly, if there are limitations - fine. But please say so in the
>> description, to avoid giving the impression that the issues have been
>> taken care of altogether.
> 
> Fair enough.

And btw - if you just sent an updated description, I think I'd commit
this without further waiting for George to find time to eventually ack
it.

Jan
Tamas K Lengyel July 18, 2019, 2:56 p.m. UTC | #10
On Thu, Jul 18, 2019 at 8:47 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> On 18.07.2019 16:35, Tamas K Lengyel wrote:
> > On Thu, Jul 18, 2019 at 8:28 AM Jan Beulich <JBeulich@suse.com> wrote:
> >> On 18.07.2019 15:47, Tamas K Lengyel wrote:
> >>> I feel like we are going in circles and having the same conversations
> >>> over and over without really making any headway. You introduced
> >>> grabbing the broken extra reference in 0502e0adae2. It is and was
> >>> actually unneeded to start with if the proper solution was put in
> >>> place, which is what this patch does, reordering things.
> >>
> >> I'm not complaining about the changes; I'd merely like the description
> >> state why they're needed.
> >
> > OK.
> >
> >>> It's possible there are other instances where this may still be
> >>> broken. Right now I only have bandwidth to test and fix the paths I
> >>> use. If that's unacceptable I'm happy to continue development in my
> >>> private fork and leave things as-is upstream.
> >>
> >> Similarly, if there are limitations - fine. But please say so in the
> >> description, to avoid giving the impression that the issues have been
> >> taken care of altogether.
> >
> > Fair enough.
>
> And btw - if you just sent an updated description, I think I'd commit
> this without further waiting for George to find time to eventually ack
> it.

That would be great. This is the message I typed out for v7:

    x86/mem_sharing: reorder when pages are unlocked and released

    Calling _put_page_type while also holding the page_lock for that
page can cause
    a deadlock. There may be code-paths still in place where this is
an issue, but
    for normal sharing purposes this has been tested and works.

    Removing grabbing the extra page reference at certain points is
done because it
    is no longer needed, a reference is held till necessary with this
reorder thus
    the extra reference is redundant.

    The comment being dropped is incorrect since it's now out-of-date.

Tamas
George Dunlap July 29, 2019, 3:46 p.m. UTC | #11
On 7/18/19 3:47 PM, Jan Beulich wrote:
> On 18.07.2019 16:35, Tamas K Lengyel wrote:
>> On Thu, Jul 18, 2019 at 8:28 AM Jan Beulich <JBeulich@suse.com> wrote:
>>> On 18.07.2019 15:47, Tamas K Lengyel wrote:
>>>> I feel like we are going in circles and having the same conversations
>>>> over and over without really making any headway. You introduced
>>>> grabbing the broken extra reference in 0502e0adae2. It is and was
>>>> actually unneeded to start with if the proper solution was put in
>>>> place, which is what this patch does, reordering things.
>>>
>>> I'm not complaining about the changes; I'd merely like the description
>>> state why they're needed.
>>
>> OK.
>>
>>>> It's possible there are other instances where this may still be
>>>> broken. Right now I only have bandwidth to test and fix the paths I
>>>> use. If that's unacceptable I'm happy to continue development in my
>>>> private fork and leave things as-is upstream.
>>>
>>> Similarly, if there are limitations - fine. But please say so in the
>>> description, to avoid giving the impression that the issues have been
>>> taken care of altogether.
>>
>> Fair enough.
> 
> And btw - if you just sent an updated description, I think I'd commit
> this without further waiting for George to find time to eventually ack
> it.

Thanks -- but it looks like maybe you didn't commit the final patch of
the series ("x86/mem_sharing: style cleanup")?

 -George
Jan Beulich July 29, 2019, 3:59 p.m. UTC | #12
On 29.07.2019 17:46, George Dunlap wrote:
> On 7/18/19 3:47 PM, Jan Beulich wrote:
>> And btw - if you just sent an updated description, I think I'd commit
>> this without further waiting for George to find time to eventually ack
>> it.
> 
> Thanks -- but it looks like maybe you didn't commit the final patch of
> the series ("x86/mem_sharing: style cleanup")?

Yes, because of objections to the damage the patch actually does.

Jan
Tamas K Lengyel July 29, 2019, 4:01 p.m. UTC | #13
On Mon, Jul 29, 2019 at 9:47 AM George Dunlap <george.dunlap@citrix.com> wrote:
>
> On 7/18/19 3:47 PM, Jan Beulich wrote:
> > On 18.07.2019 16:35, Tamas K Lengyel wrote:
> >> On Thu, Jul 18, 2019 at 8:28 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 18.07.2019 15:47, Tamas K Lengyel wrote:
> >>>> I feel like we are going in circles and having the same conversations
> >>>> over and over without really making any headway. You introduced
> >>>> grabbing the broken extra reference in 0502e0adae2. It is and was
> >>>> actually unneeded to start with if the proper solution was put in
> >>>> place, which is what this patch does, reordering things.
> >>>
> >>> I'm not complaining about the changes; I'd merely like the description
> >>> state why they're needed.
> >>
> >> OK.
> >>
> >>>> It's possible there are other instances where this may still be
> >>>> broken. Right now I only have bandwidth to test and fix the paths I
> >>>> use. If that's unacceptable I'm happy to continue development in my
> >>>> private fork and leave things as-is upstream.
> >>>
> >>> Similarly, if there are limitations - fine. But please say so in the
> >>> description, to avoid giving the impression that the issues have been
> >>> taken care of altogether.
> >>
> >> Fair enough.
> >
> > And btw - if you just sent an updated description, I think I'd commit
> > this without further waiting for George to find time to eventually ack
> > it.
>
> Thanks -- but it looks like maybe you didn't commit the final patch of
> the series ("x86/mem_sharing: style cleanup")?

Jan requested additional style cleanups to be applied. I'll try to
send that in this week.

Tamas
George Dunlap July 30, 2019, 9:31 a.m. UTC | #14
On 7/29/19 4:59 PM, Jan Beulich wrote:
> On 29.07.2019 17:46, George Dunlap wrote:
>> On 7/18/19 3:47 PM, Jan Beulich wrote:
>>> And btw - if you just sent an updated description, I think I'd commit
>>> this without further waiting for George to find time to eventually ack
>>> it.
>>
>> Thanks -- but it looks like maybe you didn't commit the final patch of
>> the series ("x86/mem_sharing: style cleanup")?
> 
> Yes, because of objections to the damage the patch actually does.

Right -- for some reason in that thread I was only getting Tamas'
replies, not yours.  But my main question was whether I could delete the
series from my review queue; and since it's to be re-sent, the answer
turns out to be "yes".

Thanks,
 -George

Patch
diff mbox series

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 58d9157fa8..6c033d1488 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -648,10 +648,6 @@  static int page_make_private(struct domain *d, struct page_info *page)
         return -EBUSY;
     }
 
-    /* We can only change the type if count is one */
-    /* Because we are locking pages individually, we need to drop
-     * the lock here, while the page is typed. We cannot risk the 
-     * race of page_unlock and then put_page_type. */
     expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
     if ( page->u.inuse.type_info != expected_type )
     {
@@ -660,12 +656,11 @@  static int page_make_private(struct domain *d, struct page_info *page)
         return -EEXIST;
     }
 
+    mem_sharing_page_unlock(page);
+
     /* Drop the final typecount */
     put_page_and_type(page);
 
-    /* Now that we've dropped the type, we can unlock */
-    mem_sharing_page_unlock(page);
-
     /* Change the owner */
     ASSERT(page_get_owner(page) == dom_cow);
     page_set_owner(page, d);
@@ -900,6 +895,7 @@  static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
     p2m_type_t smfn_type, cmfn_type;
     struct two_gfns tg;
     struct rmap_iterator ri;
+    unsigned long put_count = 0;
 
     get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
                  cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
@@ -964,15 +960,6 @@  static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
         goto err_out;
     }
 
-    /* Acquire an extra reference, for the freeing below to be safe. */
-    if ( !get_page(cpage, dom_cow) )
-    {
-        ret = -EOVERFLOW;
-        mem_sharing_page_unlock(secondpg);
-        mem_sharing_page_unlock(firstpg);
-        goto err_out;
-    }
-
     /* Merge the lists together */
     rmap_seed_iterator(cpage, &ri);
     while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
@@ -984,13 +971,14 @@  static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
          * Don't change the type of rmap for the client page. */
         rmap_del(gfn, cpage, 0);
         rmap_add(gfn, spage);
-        put_page_and_type(cpage);
+        put_count++;
         d = get_domain_by_id(gfn->domain);
         BUG_ON(!d);
         BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
         put_domain(d);
     }
     ASSERT(list_empty(&cpage->sharing->gfns));
+    BUG_ON(!put_count);
 
     /* Clear the rest of the shared state */
     page_sharing_dispose(cpage);
@@ -1001,7 +989,9 @@  static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
 
     /* Free the client page */
     put_page_alloc_ref(cpage);
-    put_page(cpage);
+
+    while ( put_count-- )
+        put_page_and_type(cpage);
 
     /* We managed to free a domain page. */
     atomic_dec(&nr_shared_mfns);
@@ -1165,19 +1155,13 @@  int __mem_sharing_unshare_page(struct domain *d,
     {
         if ( !last_gfn )
             mem_sharing_gfn_destroy(page, d, gfn_info);
-        put_page_and_type(page);
+
         mem_sharing_page_unlock(page);
+
         if ( last_gfn )
-        {
-            if ( !get_page(page, dom_cow) )
-            {
-                put_gfn(d, gfn);
-                domain_crash(d);
-                return -EOVERFLOW;
-            }
             put_page_alloc_ref(page);
-            put_page(page);
-        }
+
+        put_page_and_type(page);
         put_gfn(d, gfn);
 
         return 0;