diff mbox series

[v2,2/6] x86/mem-paging: correct p2m_mem_paging_prep()'s error handling

Message ID bf9dd27b-a7db-de0e-a804-d687e66ecf1e@suse.com (mailing list archive)
State New, archived
Headers show
Series x86/mem-paging: misc cleanup | expand

Commit Message

Jan Beulich April 23, 2020, 8:37 a.m. UTC
Communicating errors from p2m_set_entry() to the caller is not enough:
Neither the M2P nor the stats updates should occur in such a case.
Instead the allocated page needs to be freed again; for cleanliness
reasons also properly take into account _PGC_allocated there.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Roger Pau Monné May 14, 2020, 3:57 p.m. UTC | #1
On Thu, Apr 23, 2020 at 10:37:46AM +0200, Jan Beulich wrote:
> Communicating errors from p2m_set_entry() to the caller is not enough:
> Neither the M2P nor the stats updates should occur in such a case.
> Instead the allocated page needs to be freed again; for cleanliness
> reasons also properly take into account _PGC_allocated there.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> 
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -1781,7 +1781,7 @@ void p2m_mem_paging_populate(struct doma
>   */
>  int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
>  {
> -    struct page_info *page;
> +    struct page_info *page = NULL;
>      p2m_type_t p2mt;
>      p2m_access_t a;
>      gfn_t gfn = _gfn(gfn_l);
> @@ -1816,9 +1816,19 @@ int p2m_mem_paging_prep(struct domain *d
>              goto out;
>          /* Get a free page */
>          ret = -ENOMEM;
> -        page = alloc_domheap_page(p2m->domain, 0);
> +        page = alloc_domheap_page(d, 0);
>          if ( unlikely(page == NULL) )
>              goto out;
> +        if ( unlikely(!get_page(page, d)) )
> +        {
> +            /*
> +             * The domain can't possibly know about this page yet, so failure
> +             * here is a clear indication of something fishy going on.
> +             */
> +            domain_crash(d);
> +            page = NULL;
> +            goto out;
> +        }
>          mfn = page_to_mfn(page);
>          page_extant = 0;
>  
> @@ -1832,7 +1842,6 @@ int p2m_mem_paging_prep(struct domain *d
>                       "Failed to load paging-in gfn %lx Dom%d bytes left %d\n",
>                       gfn_l, d->domain_id, ret);
>              ret = -EFAULT;
> -            put_page(page); /* Don't leak pages */
>              goto out;            
>          }
>      }
> @@ -1843,13 +1852,24 @@ int p2m_mem_paging_prep(struct domain *d
>      ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
>                          paging_mode_log_dirty(d) ? p2m_ram_logdirty
>                                                   : p2m_ram_rw, a);
> -    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);

I would instead do `if ( ret ) goto out`;

And won't touch the code afterwards.

Thanks, Roger.
Andrew Cooper May 15, 2020, 2:40 p.m. UTC | #2
On 23/04/2020 09:37, Jan Beulich wrote:
> Communicating errors from p2m_set_entry() to the caller is not enough:
> Neither the M2P nor the stats updates should occur in such a case.

"neither".  Following a colon/semicolon isn't the start of a new sentence.

> Instead the allocated page needs to be freed again; for cleanliness
> reasons also properly take into account _PGC_allocated there.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -1781,7 +1781,7 @@ void p2m_mem_paging_populate(struct doma
>   */
>  int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
>  {
> -    struct page_info *page;
> +    struct page_info *page = NULL;
>      p2m_type_t p2mt;
>      p2m_access_t a;
>      gfn_t gfn = _gfn(gfn_l);
> @@ -1816,9 +1816,19 @@ int p2m_mem_paging_prep(struct domain *d
>              goto out;
>          /* Get a free page */
>          ret = -ENOMEM;
> -        page = alloc_domheap_page(p2m->domain, 0);
> +        page = alloc_domheap_page(d, 0);
>          if ( unlikely(page == NULL) )
>              goto out;
> +        if ( unlikely(!get_page(page, d)) )
> +        {
> +            /*
> +             * The domain can't possibly know about this page yet, so failure
> +             * here is a clear indication of something fishy going on.
> +             */

This needs a gprintk(XENLOG_ERR, ...) of some form.  (which also reminds
me that I *still* need to finish my patch forcing everyone to provide
something to qualify the domain crash, so release builds of Xen get some
hint as to what went on or why.)

> +            domain_crash(d);
> +            page = NULL;
> +            goto out;
> +        }
>          mfn = page_to_mfn(page);
>          page_extant = 0;
>  
> @@ -1832,7 +1842,6 @@ int p2m_mem_paging_prep(struct domain *d
>                       "Failed to load paging-in gfn %lx Dom%d bytes left %d\n",
>                       gfn_l, d->domain_id, ret);
>              ret = -EFAULT;
> -            put_page(page); /* Don't leak pages */
>              goto out;            
>          }
>      }
> @@ -1843,13 +1852,24 @@ int p2m_mem_paging_prep(struct domain *d
>      ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
>                          paging_mode_log_dirty(d) ? p2m_ram_logdirty
>                                                   : p2m_ram_rw, a);
> -    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
> +    if ( !ret )
> +    {
> +        set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
>  
> -    if ( !page_extant )
> -        atomic_dec(&d->paged_pages);
> +        if ( !page_extant )
> +            atomic_dec(&d->paged_pages);
> +    }
>  
>   out:
>      gfn_unlock(p2m, gfn, 0);
> +
> +    if ( page )
> +    {
> +        if ( ret )
> +            put_page_alloc_ref(page);
> +        put_page(page);

This is a very long way from clear enough to follow, and buggy if anyone
inserts a new goto out path.

~Andrew

> +    }
> +
>      return ret;
>  }
>  
>
Jan Beulich May 15, 2020, 3:15 p.m. UTC | #3
On 15.05.2020 16:40, Andrew Cooper wrote:
> On 23/04/2020 09:37, Jan Beulich wrote:
>> @@ -1816,9 +1816,19 @@ int p2m_mem_paging_prep(struct domain *d
>>              goto out;
>>          /* Get a free page */
>>          ret = -ENOMEM;
>> -        page = alloc_domheap_page(p2m->domain, 0);
>> +        page = alloc_domheap_page(d, 0);
>>          if ( unlikely(page == NULL) )
>>              goto out;
>> +        if ( unlikely(!get_page(page, d)) )
>> +        {
>> +            /*
>> +             * The domain can't possibly know about this page yet, so failure
>> +             * here is a clear indication of something fishy going on.
>> +             */
> 
> This needs a gprintk(XENLOG_ERR, ...) of some form.  (which also reminds
> me that I *still* need to finish my patch forcing everyone to provide
> something to qualify the domain crash, so release builds of Xen get some
> hint as to what went on or why.)

First of all - I've committed the patch earlier today, with Roger's
R-b, and considering that it had been pending for quite some time.

As to the specific aspect:

>> +            domain_crash(d);

This already leaves a file/line combination as a (minimal hint). I
can make a patch to add a gprintk() as you ask for, but I'm not
sure it's worth it for this almost dead code.

>> @@ -1843,13 +1852,24 @@ int p2m_mem_paging_prep(struct domain *d
>>      ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
>>                          paging_mode_log_dirty(d) ? p2m_ram_logdirty
>>                                                   : p2m_ram_rw, a);
>> -    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
>> +    if ( !ret )
>> +    {
>> +        set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
>>  
>> -    if ( !page_extant )
>> -        atomic_dec(&d->paged_pages);
>> +        if ( !page_extant )
>> +            atomic_dec(&d->paged_pages);
>> +    }
>>  
>>   out:
>>      gfn_unlock(p2m, gfn, 0);
>> +
>> +    if ( page )
>> +    {
>> +        if ( ret )
>> +            put_page_alloc_ref(page);
>> +        put_page(page);
> 
> This is a very long way from clear enough to follow, and buggy if anyone
> inserts a new goto out path.

What alternatives do you see?

Jan
Andrew Cooper May 15, 2020, 8:02 p.m. UTC | #4
On 15/05/2020 16:15, Jan Beulich wrote:
>>> +            domain_crash(d);
> This already leaves a file/line combination as a (minimal hint).

First, that is still tantamount to useless in logs from a user.

Second, the use of __LINE__ is why it breaks livepatching, and people
using livepatching is still carrying an out-of-tree patch to unbreak it.

> I can make a patch to add a gprintk() as you ask for, but I'm not
> sure it's worth it for this almost dead code.

"page in unexpected state" would be better than nothing, but given the
comment, it might also be better as ASSERT_UNREACHABLE(), and we now
have a lot of cases where we declare unreachable, and kill the domain in
release builds.

>
>>> @@ -1843,13 +1852,24 @@ int p2m_mem_paging_prep(struct domain *d
>>>      ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
>>>                          paging_mode_log_dirty(d) ? p2m_ram_logdirty
>>>                                                   : p2m_ram_rw, a);
>>> -    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
>>> +    if ( !ret )
>>> +    {
>>> +        set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
>>>  
>>> -    if ( !page_extant )
>>> -        atomic_dec(&d->paged_pages);
>>> +        if ( !page_extant )
>>> +            atomic_dec(&d->paged_pages);
>>> +    }
>>>  
>>>   out:
>>>      gfn_unlock(p2m, gfn, 0);
>>> +
>>> +    if ( page )
>>> +    {
>>> +        if ( ret )
>>> +            put_page_alloc_ref(page);
>>> +        put_page(page);
>> This is a very long way from clear enough to follow, and buggy if anyone
>> inserts a new goto out path.
> What alternatives do you see?

/* Fully free the page on error.  Drop our temporary reference in all
cases. */

would at least help someone trying to figure out what is going on here,
especially as put_page_alloc_ref() is not the obvious freeing function
for alloc_domheap_page().

~Andrew
diff mbox series

Patch

--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1781,7 +1781,7 @@  void p2m_mem_paging_populate(struct doma
  */
 int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
 {
-    struct page_info *page;
+    struct page_info *page = NULL;
     p2m_type_t p2mt;
     p2m_access_t a;
     gfn_t gfn = _gfn(gfn_l);
@@ -1816,9 +1816,19 @@  int p2m_mem_paging_prep(struct domain *d
             goto out;
         /* Get a free page */
         ret = -ENOMEM;
-        page = alloc_domheap_page(p2m->domain, 0);
+        page = alloc_domheap_page(d, 0);
         if ( unlikely(page == NULL) )
             goto out;
+        if ( unlikely(!get_page(page, d)) )
+        {
+            /*
+             * The domain can't possibly know about this page yet, so failure
+             * here is a clear indication of something fishy going on.
+             */
+            domain_crash(d);
+            page = NULL;
+            goto out;
+        }
         mfn = page_to_mfn(page);
         page_extant = 0;
 
@@ -1832,7 +1842,6 @@  int p2m_mem_paging_prep(struct domain *d
                      "Failed to load paging-in gfn %lx Dom%d bytes left %d\n",
                      gfn_l, d->domain_id, ret);
             ret = -EFAULT;
-            put_page(page); /* Don't leak pages */
             goto out;            
         }
     }
@@ -1843,13 +1852,24 @@  int p2m_mem_paging_prep(struct domain *d
     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
                         paging_mode_log_dirty(d) ? p2m_ram_logdirty
                                                  : p2m_ram_rw, a);
-    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
+    if ( !ret )
+    {
+        set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
 
-    if ( !page_extant )
-        atomic_dec(&d->paged_pages);
+        if ( !page_extant )
+            atomic_dec(&d->paged_pages);
+    }
 
  out:
     gfn_unlock(p2m, gfn, 0);
+
+    if ( page )
+    {
+        if ( ret )
+            put_page_alloc_ref(page);
+        put_page(page);
+    }
+
     return ret;
 }