diff mbox series

[v3,2/6] x86/HVM: split restore state checking from state loading

Message ID dcc726f5-634e-4b48-aa8f-d477cdc8dea9@suse.com (mailing list archive)
State Superseded
Headers show
Series x86/HVM: load state checking | expand

Commit Message

Jan Beulich Nov. 28, 2023, 10:34 a.m. UTC
..., at least as reasonably feasible without making a check hook
mandatory (in particular strict vs relaxed/zero-extend length checking
can't be done early this way).

Note that only one of the two uses of hvm_load() is accompanied with
hvm_check(). The other directly consumes hvm_save() output, which ought
to be well-formed. This means that while input data related checks don't
need repeating in the "load" function when already done by the "check"
one (albeit assertions to this effect may be desirable), domain state
related checks (e.g. has_xyz(d)) will be required in both places.

Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Do we really need all the copying involved in use of _hvm_read_entry()
(backing hvm_load_entry()? Zero-extending loads are likely easier to
handle that way, but for strict loads all we gain is a reduced risk of
unaligned accesses (compared to simply pointing into h->data[]).

Would the hvm_sr_handlers[] better use array_access_nospec()?
---
v2: New.

Comments

Roger Pau Monné Dec. 4, 2023, 5:27 p.m. UTC | #1
On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> ..., at least as reasonably feasible without making a check hook
> mandatory (in particular strict vs relaxed/zero-extend length checking
> can't be done early this way).
> 
> Note that only one of the two uses of hvm_load() is accompanied with
> hvm_check(). The other directly consumes hvm_save() output, which ought
> to be well-formed. This means that while input data related checks don't
> need repeating in the "load" function when already done by the "check"
> one (albeit assertions to this effect may be desirable), domain state
> related checks (e.g. has_xyz(d)) will be required in both places.
> 
> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Do we really need all the copying involved in use of _hvm_read_entry()
> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> handle that way, but for strict loads all we gain is a reduced risk of
> unaligned accesses (compared to simply pointing into h->data[]).

See below, but I wonder whether the checks could be performed as part
of hvm_load() without having to introduce a separate handler and loop
over the context entries.

> Would the hvm_sr_handlers[] better use array_access_nospec()?

Maybe?  Given this is a domctl I do wonder whether a domain already
having access to such interface won't have easier ways to leak data
from Xen.  Maybe for a disaggregated setup.

> ---
> v2: New.
> 
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -379,6 +379,10 @@ long arch_do_domctl(
>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
>              goto sethvmcontext_out;
>  
> +        ret = hvm_check(d, &c);
> +        if ( ret )
> +            goto sethvmcontext_out;
> +
>          domain_pause(d);
>          ret = hvm_load(d, &c);
>          domain_unpause(d);
> --- a/xen/arch/x86/hvm/save.c
> +++ b/xen/arch/x86/hvm/save.c
> @@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain
>      d->arch.hvm.sync_tsc = rdtsc();
>  }
>  
> -static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +static int arch_hvm_check(const struct domain *d,
> +                          const struct hvm_save_header *hdr)
>  {
>      uint32_t eax, ebx, ecx, edx;
>  
> @@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain *
>                 "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
>                 d->domain_id, hdr->cpuid, eax);
>  
> +    return 0;
> +}
> +
> +static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +{
>      /* Restore guest's preferred TSC frequency. */
>      if ( hdr->gtsc_khz )
>          d->arch.tsc_khz = hdr->gtsc_khz;
> @@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain *
>  
>      /* VGA state is not saved/restored, so we nobble the cache. */
>      d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
> -
> -    return 0;
>  }
>  
>  /* List of handlers for various HVM save and restore types */
>  static struct {
>      hvm_save_handler save;
> +    hvm_check_handler check;
>      hvm_load_handler load;
>      const char *name;
>      size_t size;
> @@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t
>  {
>      ASSERT(typecode <= HVM_SAVE_CODE_MAX);
>      ASSERT(hvm_sr_handlers[typecode].save == NULL);
> +    ASSERT(hvm_sr_handlers[typecode].check == NULL);
>      ASSERT(hvm_sr_handlers[typecode].load == NULL);
>      hvm_sr_handlers[typecode].save = save_state;
>      hvm_sr_handlers[typecode].load = load_state;
> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>      return 0;
>  }
>  
> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> +{
> +    const struct hvm_save_header *hdr;
> +    int rc;
> +
> +    if ( d->is_dying )
> +        return -EINVAL;
> +
> +    /* Get at the save header, which must be first. */
> +    hdr = hvm_get_entry(HEADER, h);
> +    if ( !hdr )
> +        return -ENODATA;
> +
> +    rc = arch_hvm_check(d, hdr);
> +    if ( rc )
> +        return rc;
> +
> +    for ( ; ; )
> +    {
> +        const struct hvm_save_descriptor *desc;
> +        hvm_check_handler handler;
> +
> +        if ( h->size - h->cur < sizeof(*desc) )
> +        {
> +            /* Run out of data */
> +            printk(XENLOG_G_ERR
> +                   "HVM restore %pd: save did not end with a null entry\n",
> +                   d);
> +            return -ENODATA;
> +        }
> +
> +        /* Read the typecode of the next entry and check for the end-marker. */
> +        desc = (const void *)&h->data[h->cur];
> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> +        {
> +            /* Reset cursor for hvm_load(). */
> +            h->cur = 0;
> +            return 0;
> +        }
> +
> +        /* Find the handler for this entry. */
> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> +             !hvm_sr_handlers[desc->typecode].name ||
> +             !hvm_sr_handlers[desc->typecode].load )
> +        {
> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> +                   d, desc->typecode);
> +            return -EINVAL;
> +        }
> +
> +        /* Check the entry. */
> +        handler = hvm_sr_handlers[desc->typecode].check;
> +        if ( !handler )
> +        {
> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> +                return -ENODATA;
> +            h->cur += sizeof(*desc) + desc->length;
> +        }
> +        else if ( (rc = handler(d, h)) )
> +        {
> +            printk(XENLOG_G_ERR
> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> +            return rc;
> +        }
> +
> +        process_pending_softirqs();

Looking at this, won't it be better to call the check() hooks inside
the hvm_load() function instead of duplicating the loop?

I realize that you only perform the checks when the state is loaded
from a domctl, but still seems quite a lot of code duplication for
little benefit.

hvm_load() could gain an extra parameter to select whether the input
must be checked or not, and that would avoid having to iterate twice
over the context.

> +    }
> +
> +    /* Not reached */

ASSERT_UNREACHABLE() maybe?

Thanks, Roger.
Jan Beulich Dec. 5, 2023, 8:52 a.m. UTC | #2
On 04.12.2023 18:27, Roger Pau Monné wrote:
> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>> ..., at least as reasonably feasible without making a check hook
>> mandatory (in particular strict vs relaxed/zero-extend length checking
>> can't be done early this way).
>>
>> Note that only one of the two uses of hvm_load() is accompanied with
>> hvm_check(). The other directly consumes hvm_save() output, which ought
>> to be well-formed. This means that while input data related checks don't
>> need repeating in the "load" function when already done by the "check"
>> one (albeit assertions to this effect may be desirable), domain state
>> related checks (e.g. has_xyz(d)) will be required in both places.
>>
>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Do we really need all the copying involved in use of _hvm_read_entry()
>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>> handle that way, but for strict loads all we gain is a reduced risk of
>> unaligned accesses (compared to simply pointing into h->data[]).
> 
> See below, but I wonder whether the checks could be performed as part
> of hvm_load() without having to introduce a separate handler and loop
> over the context entries.

Specifically not. State loading (in the longer run) would better not fail
once started. (Imo it should have been this way from the beginning.) Only
then will the vCPU still be in a predictable state even after a possible
error.

>> Would the hvm_sr_handlers[] better use array_access_nospec()?
> 
> Maybe?  Given this is a domctl I do wonder whether a domain already
> having access to such interface won't have easier ways to leak data
> from Xen.  Maybe for a disaggregated setup.

Hmm, now we're in the middle - Andrew effectively said "no need to".

>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>>      return 0;
>>  }
>>  
>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
>> +{
>> +    const struct hvm_save_header *hdr;
>> +    int rc;
>> +
>> +    if ( d->is_dying )
>> +        return -EINVAL;
>> +
>> +    /* Get at the save header, which must be first. */
>> +    hdr = hvm_get_entry(HEADER, h);
>> +    if ( !hdr )
>> +        return -ENODATA;
>> +
>> +    rc = arch_hvm_check(d, hdr);
>> +    if ( rc )
>> +        return rc;
>> +
>> +    for ( ; ; )
>> +    {
>> +        const struct hvm_save_descriptor *desc;
>> +        hvm_check_handler handler;
>> +
>> +        if ( h->size - h->cur < sizeof(*desc) )
>> +        {
>> +            /* Run out of data */
>> +            printk(XENLOG_G_ERR
>> +                   "HVM restore %pd: save did not end with a null entry\n",
>> +                   d);
>> +            return -ENODATA;
>> +        }
>> +
>> +        /* Read the typecode of the next entry and check for the end-marker. */
>> +        desc = (const void *)&h->data[h->cur];
>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
>> +        {
>> +            /* Reset cursor for hvm_load(). */
>> +            h->cur = 0;
>> +            return 0;
>> +        }
>> +
>> +        /* Find the handler for this entry. */
>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
>> +             !hvm_sr_handlers[desc->typecode].name ||
>> +             !hvm_sr_handlers[desc->typecode].load )
>> +        {
>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
>> +                   d, desc->typecode);
>> +            return -EINVAL;
>> +        }
>> +
>> +        /* Check the entry. */
>> +        handler = hvm_sr_handlers[desc->typecode].check;
>> +        if ( !handler )
>> +        {
>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
>> +                return -ENODATA;
>> +            h->cur += sizeof(*desc) + desc->length;
>> +        }
>> +        else if ( (rc = handler(d, h)) )
>> +        {
>> +            printk(XENLOG_G_ERR
>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
>> +            return rc;
>> +        }
>> +
>> +        process_pending_softirqs();
> 
> Looking at this, won't it be better to call the check() hooks inside
> the hvm_load() function instead of duplicating the loop?
> 
> I realize that you only perform the checks when the state is loaded
> from a domctl, but still seems quite a lot of code duplication for
> little benefit.
> 
> hvm_load() could gain an extra parameter to select whether the input
> must be checked or not, and that would avoid having to iterate twice
> over the context.

Well, see above.

>> +    }
>> +
>> +    /* Not reached */
> 
> ASSERT_UNREACHABLE() maybe?

Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
have such either, perhaps that's not a meaningful reference. Adding this
would make me fear introducing a Misra violation (adding dead code).

Jan
Roger Pau Monné Dec. 5, 2023, 2:29 p.m. UTC | #3
On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> On 04.12.2023 18:27, Roger Pau Monné wrote:
> > On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >> ..., at least as reasonably feasible without making a check hook
> >> mandatory (in particular strict vs relaxed/zero-extend length checking
> >> can't be done early this way).
> >>
> >> Note that only one of the two uses of hvm_load() is accompanied with
> >> hvm_check(). The other directly consumes hvm_save() output, which ought
> >> to be well-formed. This means that while input data related checks don't
> >> need repeating in the "load" function when already done by the "check"
> >> one (albeit assertions to this effect may be desirable), domain state
> >> related checks (e.g. has_xyz(d)) will be required in both places.
> >>
> >> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> Do we really need all the copying involved in use of _hvm_read_entry()
> >> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >> handle that way, but for strict loads all we gain is a reduced risk of
> >> unaligned accesses (compared to simply pointing into h->data[]).
> > 
> > See below, but I wonder whether the checks could be performed as part
> > of hvm_load() without having to introduce a separate handler and loop
> > over the context entries.
> 
> Specifically not. State loading (in the longer run) would better not fail
> once started. (Imo it should have been this way from the beginning.) Only
> then will the vCPU still be in a predictable state even after a possible
> error.

Looking at the callers, does such predictable state after failure
matter?

One caller is an hypercall used by the toolstack at domain create,
failing can just lead to the domain being destroyed.  The other caller
is vm fork, which will also lead to the fork being destroyed if
context loading fails.

Maybe I'm overlooking something.

> >> Would the hvm_sr_handlers[] better use array_access_nospec()?
> > 
> > Maybe?  Given this is a domctl I do wonder whether a domain already
> > having access to such interface won't have easier ways to leak data
> > from Xen.  Maybe for a disaggregated setup.
> 
> Hmm, now we're in the middle - Andrew effectively said "no need to".

I'm certainly not an expert on whether array_access_nospec() should be
used, so if Andrew says no need, that's likely better advice.

Maybe the xsm check used in such desegregated setups would already
stop speculation?

> >> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
> >>      return 0;
> >>  }
> >>  
> >> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> >> +{
> >> +    const struct hvm_save_header *hdr;
> >> +    int rc;
> >> +
> >> +    if ( d->is_dying )
> >> +        return -EINVAL;
> >> +
> >> +    /* Get at the save header, which must be first. */
> >> +    hdr = hvm_get_entry(HEADER, h);
> >> +    if ( !hdr )
> >> +        return -ENODATA;
> >> +
> >> +    rc = arch_hvm_check(d, hdr);
> >> +    if ( rc )
> >> +        return rc;
> >> +
> >> +    for ( ; ; )
> >> +    {
> >> +        const struct hvm_save_descriptor *desc;
> >> +        hvm_check_handler handler;
> >> +
> >> +        if ( h->size - h->cur < sizeof(*desc) )
> >> +        {
> >> +            /* Run out of data */
> >> +            printk(XENLOG_G_ERR
> >> +                   "HVM restore %pd: save did not end with a null entry\n",
> >> +                   d);
> >> +            return -ENODATA;
> >> +        }
> >> +
> >> +        /* Read the typecode of the next entry and check for the end-marker. */
> >> +        desc = (const void *)&h->data[h->cur];
> >> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> >> +        {
> >> +            /* Reset cursor for hvm_load(). */
> >> +            h->cur = 0;
> >> +            return 0;
> >> +        }
> >> +
> >> +        /* Find the handler for this entry. */
> >> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> >> +             !hvm_sr_handlers[desc->typecode].name ||
> >> +             !hvm_sr_handlers[desc->typecode].load )
> >> +        {
> >> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> >> +                   d, desc->typecode);
> >> +            return -EINVAL;
> >> +        }
> >> +
> >> +        /* Check the entry. */
> >> +        handler = hvm_sr_handlers[desc->typecode].check;
> >> +        if ( !handler )
> >> +        {
> >> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> >> +                return -ENODATA;
> >> +            h->cur += sizeof(*desc) + desc->length;
> >> +        }
> >> +        else if ( (rc = handler(d, h)) )
> >> +        {
> >> +            printk(XENLOG_G_ERR
> >> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> >> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> >> +            return rc;
> >> +        }
> >> +
> >> +        process_pending_softirqs();
> > 
> > Looking at this, won't it be better to call the check() hooks inside
> > the hvm_load() function instead of duplicating the loop?
> > 
> > I realize that you only perform the checks when the state is loaded
> > from a domctl, but still seems quite a lot of code duplication for
> > little benefit.
> > 
> > hvm_load() could gain an extra parameter to select whether the input
> > must be checked or not, and that would avoid having to iterate twice
> > over the context.
> 
> Well, see above.
> 
> >> +    }
> >> +
> >> +    /* Not reached */
> > 
> > ASSERT_UNREACHABLE() maybe?
> 
> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
> have such either, perhaps that's not a meaningful reference. Adding this
> would make me fear introducing a Misra violation (adding dead code).

But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
Misra will need an exception for all usage of ASSERT_UNREACHABLE()
already.

I think ASSERT_UNREACHABLE() is much better than a Not reached
comment: conveys the same information to readers of the code and has
a run-time consequence on debug builds.

Thanks, Roger.
Jan Beulich Dec. 5, 2023, 2:59 p.m. UTC | #4
On 05.12.2023 15:29, Roger Pau Monné wrote:
> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>> ..., at least as reasonably feasible without making a check hook
>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>> can't be done early this way).
>>>>
>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>> to be well-formed. This means that while input data related checks don't
>>>> need repeating in the "load" function when already done by the "check"
>>>> one (albeit assertions to this effect may be desirable), domain state
>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>
>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>
>>> See below, but I wonder whether the checks could be performed as part
>>> of hvm_load() without having to introduce a separate handler and loop
>>> over the context entries.
>>
>> Specifically not. State loading (in the longer run) would better not fail
>> once started. (Imo it should have been this way from the beginning.) Only
>> then will the vCPU still be in a predictable state even after a possible
>> error.
> 
> Looking at the callers, does such predictable state after failure
> matter?
> 
> One caller is an hypercall used by the toolstack at domain create,
> failing can just lead to the domain being destroyed.  The other caller
> is vm fork, which will also lead to the fork being destroyed if
> context loading fails.
> 
> Maybe I'm overlooking something.

You don't (I think), but existing callers necessarily have to behave the
way you describe. From an abstract perspective, though, failed state
loading would better allow a retry. And really I thought that when you
suggested to split checking from loading, you had exactly that in mind.

>>>> Would the hvm_sr_handlers[] better use array_access_nospec()?
>>>
>>> Maybe?  Given this is a domctl I do wonder whether a domain already
>>> having access to such interface won't have easier ways to leak data
>>> from Xen.  Maybe for a disaggregated setup.
>>
>> Hmm, now we're in the middle - Andrew effectively said "no need to".
> 
> I'm certainly not an expert on whether array_access_nospec() should be
> used, so if Andrew says no need, that's likely better advice.
> 
> Maybe the xsm check used in such desegregated setups would already
> stop speculation?

There's no XSM check anywhere near, and even if there was I don't see
how it would stop mis-speculation on those array accesses.

>>>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>>>>      return 0;
>>>>  }
>>>>  
>>>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
>>>> +{
>>>> +    const struct hvm_save_header *hdr;
>>>> +    int rc;
>>>> +
>>>> +    if ( d->is_dying )
>>>> +        return -EINVAL;
>>>> +
>>>> +    /* Get at the save header, which must be first. */
>>>> +    hdr = hvm_get_entry(HEADER, h);
>>>> +    if ( !hdr )
>>>> +        return -ENODATA;
>>>> +
>>>> +    rc = arch_hvm_check(d, hdr);
>>>> +    if ( rc )
>>>> +        return rc;
>>>> +
>>>> +    for ( ; ; )
>>>> +    {
>>>> +        const struct hvm_save_descriptor *desc;
>>>> +        hvm_check_handler handler;
>>>> +
>>>> +        if ( h->size - h->cur < sizeof(*desc) )
>>>> +        {
>>>> +            /* Run out of data */
>>>> +            printk(XENLOG_G_ERR
>>>> +                   "HVM restore %pd: save did not end with a null entry\n",
>>>> +                   d);
>>>> +            return -ENODATA;
>>>> +        }
>>>> +
>>>> +        /* Read the typecode of the next entry and check for the end-marker. */
>>>> +        desc = (const void *)&h->data[h->cur];
>>>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
>>>> +        {
>>>> +            /* Reset cursor for hvm_load(). */
>>>> +            h->cur = 0;
>>>> +            return 0;
>>>> +        }
>>>> +
>>>> +        /* Find the handler for this entry. */
>>>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
>>>> +             !hvm_sr_handlers[desc->typecode].name ||
>>>> +             !hvm_sr_handlers[desc->typecode].load )
>>>> +        {
>>>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
>>>> +                   d, desc->typecode);
>>>> +            return -EINVAL;
>>>> +        }
>>>> +
>>>> +        /* Check the entry. */
>>>> +        handler = hvm_sr_handlers[desc->typecode].check;
>>>> +        if ( !handler )
>>>> +        {
>>>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
>>>> +                return -ENODATA;
>>>> +            h->cur += sizeof(*desc) + desc->length;
>>>> +        }
>>>> +        else if ( (rc = handler(d, h)) )
>>>> +        {
>>>> +            printk(XENLOG_G_ERR
>>>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
>>>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
>>>> +            return rc;
>>>> +        }
>>>> +
>>>> +        process_pending_softirqs();
>>>
>>> Looking at this, won't it be better to call the check() hooks inside
>>> the hvm_load() function instead of duplicating the loop?
>>>
>>> I realize that you only perform the checks when the state is loaded
>>> from a domctl, but still seems quite a lot of code duplication for
>>> little benefit.
>>>
>>> hvm_load() could gain an extra parameter to select whether the input
>>> must be checked or not, and that would avoid having to iterate twice
>>> over the context.
>>
>> Well, see above.
>>
>>>> +    }
>>>> +
>>>> +    /* Not reached */
>>>
>>> ASSERT_UNREACHABLE() maybe?
>>
>> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
>> have such either, perhaps that's not a meaningful reference. Adding this
>> would make me fear introducing a Misra violation (adding dead code).
> 
> But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
> Misra will need an exception for all usage of ASSERT_UNREACHABLE()
> already.
> 
> I think ASSERT_UNREACHABLE() is much better than a Not reached
> comment: conveys the same information to readers of the code and has
> a run-time consequence on debug builds.

I see a difference between uses on paths were we assert that a certain
state cannot be reached (if all our logic is right) vs a case like the
one here where the compiler (or another tool) can actually prove that
the loop can't be exited the "normal" way.

Jan
Roger Pau Monné Dec. 5, 2023, 3:55 p.m. UTC | #5
On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> On 05.12.2023 15:29, Roger Pau Monné wrote:
> > On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>> ..., at least as reasonably feasible without making a check hook
> >>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>> can't be done early this way).
> >>>>
> >>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>> to be well-formed. This means that while input data related checks don't
> >>>> need repeating in the "load" function when already done by the "check"
> >>>> one (albeit assertions to this effect may be desirable), domain state
> >>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>
> >>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>> ---
> >>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>
> >>> See below, but I wonder whether the checks could be performed as part
> >>> of hvm_load() without having to introduce a separate handler and loop
> >>> over the context entries.
> >>
> >> Specifically not. State loading (in the longer run) would better not fail
> >> once started. (Imo it should have been this way from the beginning.) Only
> >> then will the vCPU still be in a predictable state even after a possible
> >> error.
> > 
> > Looking at the callers, does such predictable state after failure
> > matter?
> > 
> > One caller is an hypercall used by the toolstack at domain create,
> > failing can just lead to the domain being destroyed.  The other caller
> > is vm fork, which will also lead to the fork being destroyed if
> > context loading fails.
> > 
> > Maybe I'm overlooking something.
> 
> You don't (I think), but existing callers necessarily have to behave the
> way you describe. From an abstract perspective, though, failed state
> loading would better allow a retry. And really I thought that when you
> suggested to split checking from loading, you had exactly that in mind.

Not really TBH, because I didn't think that much on a possible
implementation when proposing it.

Maybe a suitable compromise would be to reset the state to the initial
(at domain build) one on failure?

I do dislike the duplicated loops, as it seems like a lot of duplicate
boilerplate code, and I have fears of it going out of sync.

> >>>> Would the hvm_sr_handlers[] better use array_access_nospec()?
> >>>
> >>> Maybe?  Given this is a domctl I do wonder whether a domain already
> >>> having access to such interface won't have easier ways to leak data
> >>> from Xen.  Maybe for a disaggregated setup.
> >>
> >> Hmm, now we're in the middle - Andrew effectively said "no need to".
> > 
> > I'm certainly not an expert on whether array_access_nospec() should be
> > used, so if Andrew says no need, that's likely better advice.
> > 
> > Maybe the xsm check used in such desegregated setups would already
> > stop speculation?
> 
> There's no XSM check anywhere near, and even if there was I don't see
> how it would stop mis-speculation on those array accesses.

This being a slow path anyway, I don't think the extra
array_access_nospec() would make much of an impact, but again I have
to admit it's unclear to me when those are actually required, so I
might suggest adding them out of precaution.

> >>>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
> >>>>      return 0;
> >>>>  }
> >>>>  
> >>>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> >>>> +{
> >>>> +    const struct hvm_save_header *hdr;
> >>>> +    int rc;
> >>>> +
> >>>> +    if ( d->is_dying )
> >>>> +        return -EINVAL;
> >>>> +
> >>>> +    /* Get at the save header, which must be first. */
> >>>> +    hdr = hvm_get_entry(HEADER, h);
> >>>> +    if ( !hdr )
> >>>> +        return -ENODATA;
> >>>> +
> >>>> +    rc = arch_hvm_check(d, hdr);
> >>>> +    if ( rc )
> >>>> +        return rc;
> >>>> +
> >>>> +    for ( ; ; )
> >>>> +    {
> >>>> +        const struct hvm_save_descriptor *desc;
> >>>> +        hvm_check_handler handler;
> >>>> +
> >>>> +        if ( h->size - h->cur < sizeof(*desc) )
> >>>> +        {
> >>>> +            /* Run out of data */
> >>>> +            printk(XENLOG_G_ERR
> >>>> +                   "HVM restore %pd: save did not end with a null entry\n",
> >>>> +                   d);
> >>>> +            return -ENODATA;
> >>>> +        }
> >>>> +
> >>>> +        /* Read the typecode of the next entry and check for the end-marker. */
> >>>> +        desc = (const void *)&h->data[h->cur];
> >>>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> >>>> +        {
> >>>> +            /* Reset cursor for hvm_load(). */
> >>>> +            h->cur = 0;
> >>>> +            return 0;
> >>>> +        }
> >>>> +
> >>>> +        /* Find the handler for this entry. */
> >>>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> >>>> +             !hvm_sr_handlers[desc->typecode].name ||
> >>>> +             !hvm_sr_handlers[desc->typecode].load )
> >>>> +        {
> >>>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> >>>> +                   d, desc->typecode);
> >>>> +            return -EINVAL;
> >>>> +        }
> >>>> +
> >>>> +        /* Check the entry. */
> >>>> +        handler = hvm_sr_handlers[desc->typecode].check;
> >>>> +        if ( !handler )
> >>>> +        {
> >>>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> >>>> +                return -ENODATA;
> >>>> +            h->cur += sizeof(*desc) + desc->length;
> >>>> +        }
> >>>> +        else if ( (rc = handler(d, h)) )
> >>>> +        {
> >>>> +            printk(XENLOG_G_ERR
> >>>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> >>>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> >>>> +            return rc;
> >>>> +        }
> >>>> +
> >>>> +        process_pending_softirqs();
> >>>
> >>> Looking at this, won't it be better to call the check() hooks inside
> >>> the hvm_load() function instead of duplicating the loop?
> >>>
> >>> I realize that you only perform the checks when the state is loaded
> >>> from a domctl, but still seems quite a lot of code duplication for
> >>> little benefit.
> >>>
> >>> hvm_load() could gain an extra parameter to select whether the input
> >>> must be checked or not, and that would avoid having to iterate twice
> >>> over the context.
> >>
> >> Well, see above.
> >>
> >>>> +    }
> >>>> +
> >>>> +    /* Not reached */
> >>>
> >>> ASSERT_UNREACHABLE() maybe?
> >>
> >> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
> >> have such either, perhaps that's not a meaningful reference. Adding this
> >> would make me fear introducing a Misra violation (adding dead code).
> > 
> > But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
> > Misra will need an exception for all usage of ASSERT_UNREACHABLE()
> > already.
> > 
> > I think ASSERT_UNREACHABLE() is much better than a Not reached
> > comment: conveys the same information to readers of the code and has
> > a run-time consequence on debug builds.
> 
> I see a difference between uses on paths were we assert that a certain
> state cannot be reached (if all our logic is right) vs a case like the
> one here where the compiler (or another tool) can actually prove that
> the loop can't be exited the "normal" way.

Can't be exited with the current code, but the purpose of
ASSERT_UNREACHABLE() is also to guarantee that further changes might
not break this condition.

Thanks, Roger.
Jan Beulich Dec. 6, 2023, 7:27 a.m. UTC | #6
On 05.12.2023 16:55, Roger Pau Monné wrote:
> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>> can't be done early this way).
>>>>>>
>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>
>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>> ---
>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>
>>>>> See below, but I wonder whether the checks could be performed as part
>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>> over the context entries.
>>>>
>>>> Specifically not. State loading (in the longer run) would better not fail
>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>> then will the vCPU still be in a predictable state even after a possible
>>>> error.
>>>
>>> Looking at the callers, does such predictable state after failure
>>> matter?
>>>
>>> One caller is an hypercall used by the toolstack at domain create,
>>> failing can just lead to the domain being destroyed.  The other caller
>>> is vm fork, which will also lead to the fork being destroyed if
>>> context loading fails.
>>>
>>> Maybe I'm overlooking something.
>>
>> You don't (I think), but existing callers necessarily have to behave the
>> way you describe. From an abstract perspective, though, failed state
>> loading would better allow a retry. And really I thought that when you
>> suggested to split checking from loading, you had exactly that in mind.
> 
> Not really TBH, because I didn't think that much on a possible
> implementation when proposing it.

But what else did you think of then in terms of separating checking from
loading?

> Maybe a suitable compromise would be to reset the state to the initial
> (at domain build) one on failure?

That's an option, sure.

> I do dislike the duplicated loops, as it seems like a lot of duplicate
> boilerplate code, and I have fears of it going out of sync.

There's a certain risk, yes, but that exists similarly with the save and
load sides imo.

Andrew - before I go and undo the v2 changes, what is your view?

Jan
Roger Pau Monné Dec. 11, 2023, 10:46 a.m. UTC | #7
On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
> On 05.12.2023 16:55, Roger Pau Monné wrote:
> > On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> >> On 05.12.2023 15:29, Roger Pau Monné wrote:
> >>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>>>> ..., at least as reasonably feasible without making a check hook
> >>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>>>> can't be done early this way).
> >>>>>>
> >>>>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>>>> to be well-formed. This means that while input data related checks don't
> >>>>>> need repeating in the "load" function when already done by the "check"
> >>>>>> one (albeit assertions to this effect may be desirable), domain state
> >>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>>>
> >>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>>>> ---
> >>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>>>
> >>>>> See below, but I wonder whether the checks could be performed as part
> >>>>> of hvm_load() without having to introduce a separate handler and loop
> >>>>> over the context entries.
> >>>>
> >>>> Specifically not. State loading (in the longer run) would better not fail
> >>>> once started. (Imo it should have been this way from the beginning.) Only
> >>>> then will the vCPU still be in a predictable state even after a possible
> >>>> error.
> >>>
> >>> Looking at the callers, does such predictable state after failure
> >>> matter?
> >>>
> >>> One caller is an hypercall used by the toolstack at domain create,
> >>> failing can just lead to the domain being destroyed.  The other caller
> >>> is vm fork, which will also lead to the fork being destroyed if
> >>> context loading fails.
> >>>
> >>> Maybe I'm overlooking something.
> >>
> >> You don't (I think), but existing callers necessarily have to behave the
> >> way you describe. From an abstract perspective, though, failed state
> >> loading would better allow a retry. And really I thought that when you
> >> suggested to split checking from loading, you had exactly that in mind.
> > 
> > Not really TBH, because I didn't think that much on a possible
> > implementation when proposing it.
> 
> But what else did you think of then in terms of separating checking from
> loading?

Just calling the check and load functions inside of the same loop was
my initial thought.

> > Maybe a suitable compromise would be to reset the state to the initial
> > (at domain build) one on failure?
> 
> That's an option, sure.
> 
> > I do dislike the duplicated loops, as it seems like a lot of duplicate
> > boilerplate code, and I have fears of it going out of sync.
> 
> There's a certain risk, yes, but that exists similarly with the save and
> load sides imo.

Hm, yes, albeit I have the feeling those are not as similar as the
proposed check and load loops.

Thanks, Roger.
Jan Beulich Dec. 11, 2023, 11:31 a.m. UTC | #8
On 11.12.2023 11:46, Roger Pau Monné wrote:
> On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
>> On 05.12.2023 16:55, Roger Pau Monné wrote:
>>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>>>> can't be done early this way).
>>>>>>>>
>>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>>>
>>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>>>> ---
>>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>>>
>>>>>>> See below, but I wonder whether the checks could be performed as part
>>>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>>>> over the context entries.
>>>>>>
>>>>>> Specifically not. State loading (in the longer run) would better not fail
>>>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>>>> then will the vCPU still be in a predictable state even after a possible
>>>>>> error.
>>>>>
>>>>> Looking at the callers, does such predictable state after failure
>>>>> matter?
>>>>>
>>>>> One caller is an hypercall used by the toolstack at domain create,
>>>>> failing can just lead to the domain being destroyed.  The other caller
>>>>> is vm fork, which will also lead to the fork being destroyed if
>>>>> context loading fails.
>>>>>
>>>>> Maybe I'm overlooking something.
>>>>
>>>> You don't (I think), but existing callers necessarily have to behave the
>>>> way you describe. From an abstract perspective, though, failed state
>>>> loading would better allow a retry. And really I thought that when you
>>>> suggested to split checking from loading, you had exactly that in mind.
>>>
>>> Not really TBH, because I didn't think that much on a possible
>>> implementation when proposing it.
>>
>> But what else did you think of then in terms of separating checking from
>> loading?
> 
> Just calling the check and load functions inside of the same loop was
> my initial thought.

Okay, I was meanwhile also guessing that this might have been what you
thought of. I can go that route, but I wouldn't want to make it "and", but
"or" then, depending on a new boolean parameter to be passed to hvm_load().
IOW I'd still like to do all checking before all loading (in the longer
run, that is i.e. after individual handlers have been adapted). Would that
be okay with you?

Jan
Roger Pau Monné Dec. 11, 2023, 12:43 p.m. UTC | #9
On Mon, Dec 11, 2023 at 12:31:11PM +0100, Jan Beulich wrote:
> On 11.12.2023 11:46, Roger Pau Monné wrote:
> > On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
> >> On 05.12.2023 16:55, Roger Pau Monné wrote:
> >>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> >>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
> >>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>>>>>> ..., at least as reasonably feasible without making a check hook
> >>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>>>>>> can't be done early this way).
> >>>>>>>>
> >>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>>>>>> to be well-formed. This means that while input data related checks don't
> >>>>>>>> need repeating in the "load" function when already done by the "check"
> >>>>>>>> one (albeit assertions to this effect may be desirable), domain state
> >>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>>>>>
> >>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>>>>>> ---
> >>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>>>>>
> >>>>>>> See below, but I wonder whether the checks could be performed as part
> >>>>>>> of hvm_load() without having to introduce a separate handler and loop
> >>>>>>> over the context entries.
> >>>>>>
> >>>>>> Specifically not. State loading (in the longer run) would better not fail
> >>>>>> once started. (Imo it should have been this way from the beginning.) Only
> >>>>>> then will the vCPU still be in a predictable state even after a possible
> >>>>>> error.
> >>>>>
> >>>>> Looking at the callers, does such predictable state after failure
> >>>>> matter?
> >>>>>
> >>>>> One caller is an hypercall used by the toolstack at domain create,
> >>>>> failing can just lead to the domain being destroyed.  The other caller
> >>>>> is vm fork, which will also lead to the fork being destroyed if
> >>>>> context loading fails.
> >>>>>
> >>>>> Maybe I'm overlooking something.
> >>>>
> >>>> You don't (I think), but existing callers necessarily have to behave the
> >>>> way you describe. From an abstract perspective, though, failed state
> >>>> loading would better allow a retry. And really I thought that when you
> >>>> suggested to split checking from loading, you had exactly that in mind.
> >>>
> >>> Not really TBH, because I didn't think that much on a possible
> >>> implementation when proposing it.
> >>
> >> But what else did you think of then in terms of separating checking from
> >> loading?
> > 
> > Just calling the check and load functions inside of the same loop was
> > my initial thought.
> 
> Okay, I was meanwhile also guessing that this might have been what you
> thought of. I can go that route, but I wouldn't want to make it "and", but
> "or" then, depending on a new boolean parameter to be passed to hvm_load().
> IOW I'd still like to do all checking before all loading (in the longer
> run, that is i.e. after individual handlers have been adapted). Would that
> be okay with you?

Yes, that would be fine.  I assume you will introduce a 'dry run'
parameter then?

Thanks, Roger.
Jan Beulich Dec. 11, 2023, 1:15 p.m. UTC | #10
On 11.12.2023 13:43, Roger Pau Monné wrote:
> On Mon, Dec 11, 2023 at 12:31:11PM +0100, Jan Beulich wrote:
>> On 11.12.2023 11:46, Roger Pau Monné wrote:
>>> On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
>>>> On 05.12.2023 16:55, Roger Pau Monné wrote:
>>>>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>>>>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>>>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>>>>>> can't be done early this way).
>>>>>>>>>>
>>>>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>>>>>
>>>>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>>>>>> ---
>>>>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>>>>>
>>>>>>>>> See below, but I wonder whether the checks could be performed as part
>>>>>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>>>>>> over the context entries.
>>>>>>>>
>>>>>>>> Specifically not. State loading (in the longer run) would better not fail
>>>>>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>>>>>> then will the vCPU still be in a predictable state even after a possible
>>>>>>>> error.
>>>>>>>
>>>>>>> Looking at the callers, does such predictable state after failure
>>>>>>> matter?
>>>>>>>
>>>>>>> One caller is an hypercall used by the toolstack at domain create,
>>>>>>> failing can just lead to the domain being destroyed.  The other caller
>>>>>>> is vm fork, which will also lead to the fork being destroyed if
>>>>>>> context loading fails.
>>>>>>>
>>>>>>> Maybe I'm overlooking something.
>>>>>>
>>>>>> You don't (I think), but existing callers necessarily have to behave the
>>>>>> way you describe. From an abstract perspective, though, failed state
>>>>>> loading would better allow a retry. And really I thought that when you
>>>>>> suggested to split checking from loading, you had exactly that in mind.
>>>>>
>>>>> Not really TBH, because I didn't think that much on a possible
>>>>> implementation when proposing it.
>>>>
>>>> But what else did you think of then in terms of separating checking from
>>>> loading?
>>>
>>> Just calling the check and load functions inside of the same loop was
>>> my initial thought.
>>
>> Okay, I was meanwhile also guessing that this might have been what you
>> thought of. I can go that route, but I wouldn't want to make it "and", but
>> "or" then, depending on a new boolean parameter to be passed to hvm_load().
>> IOW I'd still like to do all checking before all loading (in the longer
>> run, that is i.e. after individual handlers have been adapted). Would that
>> be okay with you?
> 
> Yes, that would be fine.  I assume you will introduce a 'dry run'
> parameter then?

Something like that, yes. I considered and discarded (mentally) "dry run"
for naming though, as the functions performed really differ (to me "dry
run" would mean that all the same checking would be done again when doing
the "real" run). I was further considering "check", "check_only", "load",
and "real", but to be honest I don't really like any of them. So the
naming aspect is still pending.

Jan
diff mbox series

Patch

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -379,6 +379,10 @@  long arch_do_domctl(
         if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
             goto sethvmcontext_out;
 
+        ret = hvm_check(d, &c);
+        if ( ret )
+            goto sethvmcontext_out;
+
         domain_pause(d);
         ret = hvm_load(d, &c);
         domain_unpause(d);
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -30,7 +30,8 @@  static void arch_hvm_save(struct domain
     d->arch.hvm.sync_tsc = rdtsc();
 }
 
-static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+static int arch_hvm_check(const struct domain *d,
+                          const struct hvm_save_header *hdr)
 {
     uint32_t eax, ebx, ecx, edx;
 
@@ -55,6 +56,11 @@  static int arch_hvm_load(struct domain *
                "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
                d->domain_id, hdr->cpuid, eax);
 
+    return 0;
+}
+
+static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+{
     /* Restore guest's preferred TSC frequency. */
     if ( hdr->gtsc_khz )
         d->arch.tsc_khz = hdr->gtsc_khz;
@@ -66,13 +72,12 @@  static int arch_hvm_load(struct domain *
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
-
-    return 0;
 }
 
 /* List of handlers for various HVM save and restore types */
 static struct {
     hvm_save_handler save;
+    hvm_check_handler check;
     hvm_load_handler load;
     const char *name;
     size_t size;
@@ -88,6 +93,7 @@  void __init hvm_register_savevm(uint16_t
 {
     ASSERT(typecode <= HVM_SAVE_CODE_MAX);
     ASSERT(hvm_sr_handlers[typecode].save == NULL);
+    ASSERT(hvm_sr_handlers[typecode].check == NULL);
     ASSERT(hvm_sr_handlers[typecode].load == NULL);
     hvm_sr_handlers[typecode].save = save_state;
     hvm_sr_handlers[typecode].load = load_state;
@@ -275,6 +281,78 @@  int hvm_save(struct domain *d, hvm_domai
     return 0;
 }
 
+int hvm_check(const struct domain *d, hvm_domain_context_t *h)
+{
+    const struct hvm_save_header *hdr;
+    int rc;
+
+    if ( d->is_dying )
+        return -EINVAL;
+
+    /* Get at the save header, which must be first. */
+    hdr = hvm_get_entry(HEADER, h);
+    if ( !hdr )
+        return -ENODATA;
+
+    rc = arch_hvm_check(d, hdr);
+    if ( rc )
+        return rc;
+
+    for ( ; ; )
+    {
+        const struct hvm_save_descriptor *desc;
+        hvm_check_handler handler;
+
+        if ( h->size - h->cur < sizeof(*desc) )
+        {
+            /* Run out of data */
+            printk(XENLOG_G_ERR
+                   "HVM restore %pd: save did not end with a null entry\n",
+                   d);
+            return -ENODATA;
+        }
+
+        /* Read the typecode of the next entry and check for the end-marker. */
+        desc = (const void *)&h->data[h->cur];
+        if ( desc->typecode == HVM_SAVE_CODE(END) )
+        {
+            /* Reset cursor for hvm_load(). */
+            h->cur = 0;
+            return 0;
+        }
+
+        /* Find the handler for this entry. */
+        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
+             !hvm_sr_handlers[desc->typecode].name ||
+             !hvm_sr_handlers[desc->typecode].load )
+        {
+            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
+                   d, desc->typecode);
+            return -EINVAL;
+        }
+
+        /* Check the entry. */
+        handler = hvm_sr_handlers[desc->typecode].check;
+        if ( !handler )
+        {
+            if ( desc->length > h->size - h->cur - sizeof(*desc) )
+                return -ENODATA;
+            h->cur += sizeof(*desc) + desc->length;
+        }
+        else if ( (rc = handler(d, h)) )
+        {
+            printk(XENLOG_G_ERR
+                   "HVM restore %pd: failed to check %s:%u rc %d\n",
+                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
+            return rc;
+        }
+
+        process_pending_softirqs();
+    }
+
+    /* Not reached */
+}
+
 int hvm_load(struct domain *d, hvm_domain_context_t *h)
 {
     const struct hvm_save_header *hdr;
@@ -291,9 +369,8 @@  int hvm_load(struct domain *d, hvm_domai
     if ( !hdr )
         return -ENODATA;
 
-    rc = arch_hvm_load(d, hdr);
-    if ( rc )
-        return rc;
+    ASSERT(!arch_hvm_check(d, hdr));
+    arch_hvm_load(d, hdr);
 
     /* Down all the vcpus: we only re-enable the ones that had state saved. */
     for_each_vcpu(d, v)
@@ -304,10 +381,7 @@  int hvm_load(struct domain *d, hvm_domai
     {
         if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
         {
-            /* Run out of data */
-            printk(XENLOG_G_ERR
-                   "HVM%d restore: save did not end with a null entry\n",
-                   d->domain_id);
+            ASSERT_UNREACHABLE();
             return -ENODATA;
         }
 
@@ -320,8 +394,7 @@  int hvm_load(struct domain *d, hvm_domai
         if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
              ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
         {
-            printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
-                   d->domain_id, desc->typecode);
+            ASSERT_UNREACHABLE();
             return -EINVAL;
         }
 
--- a/xen/arch/x86/include/asm/hvm/save.h
+++ b/xen/arch/x86/include/asm/hvm/save.h
@@ -103,6 +103,8 @@  static inline unsigned int hvm_load_inst
  * restoring.  Both return non-zero on error. */
 typedef int (*hvm_save_handler) (struct vcpu *v,
                                  hvm_domain_context_t *h);
+typedef int (*hvm_check_handler)(const struct domain *d,
+                                 hvm_domain_context_t *h);
 typedef int (*hvm_load_handler) (struct domain *d,
                                  hvm_domain_context_t *h);
 
@@ -140,6 +142,7 @@  size_t hvm_save_size(struct domain *d);
 int hvm_save(struct domain *d, hvm_domain_context_t *h);
 int hvm_save_one(struct domain *d, unsigned int typecode, unsigned int instance,
                  XEN_GUEST_HANDLE_64(uint8) handle, uint64_t *bufsz);
+int hvm_check(const struct domain *d, hvm_domain_context_t *h);
 int hvm_load(struct domain *d, hvm_domain_context_t *h);
 
 #endif /* __XEN_HVM_SAVE_H__ */