[v4,06/10] memory: batch processing in acquire_resource()
diff mbox series

Message ID a317b169e3710a481bb4be066d9b878f27b3e66c.1593519420.git.michal.leszczynski@cert.pl
State Superseded
Headers show
Series
  • Implement support for external IPT monitoring
Related show

Commit Message

Michał Leszczyński June 30, 2020, 12:33 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Allow to acquire large resources by allowing acquire_resource()
to process items in batches, using hypercall continuation.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

Comments

Roger Pau Monné July 1, 2020, 10:46 a.m. UTC | #1
On Tue, Jun 30, 2020 at 02:33:49PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to acquire large resources by allowing acquire_resource()
> to process items in batches, using hypercall continuation.

This patch should be the first of thew series IMO, since it can go in
independently of the rest, as it's a general improvement to
XENMEM_acquire_resource.

> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
>  1 file changed, 29 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 714077c1e5..3ab06581a2 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>  }
>  
>  static int acquire_resource(
> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> +    unsigned long *start_extent)
>  {
>      struct domain *d, *currd = current->domain;
>      xen_mem_acquire_resource_t xmar;
> +    uint32_t total_frames;
>      /*
>       * The mfn_list and gfn_list (below) arrays are ok on stack for the
>       * moment since they are small, but if they need to grow in future
> @@ -1077,8 +1079,17 @@ static int acquire_resource(
>          return 0;
>      }
>  
> +    total_frames = xmar.nr_frames;
> +
> +    if ( *start_extent )
> +    {
> +        xmar.frame += *start_extent;
> +        xmar.nr_frames -= *start_extent;
> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> +    }
> +
>      if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
> -        return -E2BIG;
> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
>  
>      rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>      if ( rc )
> @@ -1135,6 +1146,14 @@ static int acquire_resource(
>          }
>      }
>  
> +    if ( !rc )
> +    {
> +        *start_extent += xmar.nr_frames;
> +
> +        if ( *start_extent != total_frames )
> +            rc = -ERESTART;
> +    }

I think you should add some kind of loop here, processing just 32
frames and preempting might be too low. You generally want to loop
doing batches of 32 entries until hypercall_preempt_check() returns
true.

Thanks, Roger.
Julien Grall July 3, 2020, 10:35 a.m. UTC | #2
(+ Paul as the author XENMEM_acquire_resource)

Hi,

On 30/06/2020 13:33, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to acquire large resources by allowing acquire_resource()
> to process items in batches, using hypercall continuation.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>   xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
>   1 file changed, 29 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 714077c1e5..3ab06581a2 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>   }
>   
>   static int acquire_resource(
> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> +    unsigned long *start_extent)
>   {
>       struct domain *d, *currd = current->domain;
>       xen_mem_acquire_resource_t xmar;
> +    uint32_t total_frames;
>       /*
>        * The mfn_list and gfn_list (below) arrays are ok on stack for the
>        * moment since they are small, but if they need to grow in future
> @@ -1077,8 +1079,17 @@ static int acquire_resource(
>           return 0;
>       }
>   
> +    total_frames = xmar.nr_frames;

On 32-bit, the start_extent would be 26-bits wide which is not enough to 
cover all the xmar.nr_frames. Therefore, you want that check that it is 
possible to encode a continuation. Something like:

/* Is the size too large for us to encode a continuation? */
if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )

> +
> +    if ( *start_extent ) > +    {
> +        xmar.frame += *start_extent;
> +        xmar.nr_frames -= *start_extent;

As start_extent is exposed to the guest, you want to check if it is not 
bigger than xmar.nr_frames.

> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> +    }
> +
>       if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
> -        return -E2BIG;
> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);

The documentation of the hypercall suggests that if you pass NULL, then 
it will return the maximum number value for nr_frames supported by the 
implementation. So technically a domain cannot use more than 
ARRAY_SIZE(mfn_list).

However, you new addition conflict with the documentation. Can you 
clarify how a domain will know that it can use more than 
ARRAY_SIZE(mfn_list)?

>   
>       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>       if ( rc )
> @@ -1135,6 +1146,14 @@ static int acquire_resource(
>           }
>       }
>   
> +    if ( !rc )
> +    {
> +        *start_extent += xmar.nr_frames;
> +
> +        if ( *start_extent != total_frames )
> +            rc = -ERESTART;
> +    }
> +
>    out:
>       rcu_unlock_domain(d);
>   
> @@ -1600,7 +1619,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>   
>       case XENMEM_acquire_resource:
>           rc = acquire_resource(
> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
> +            guest_handle_cast(arg, xen_mem_acquire_resource_t),
> +            &start_extent);

Hmmm... it looks like we forgot to check that start_extent is always 0 
when the hypercall was added.

As this is exposed to the guest, it technically means that there no 
guarantee that start_extent will always be 0.

However, in practice, this was likely the intention and should be the 
case. So it may just be enough to mention the potential breakage in the 
commit message.

@All: what do you think?

> +
> +        if ( rc == -ERESTART )
> +            return hypercall_create_continuation(
> +                __HYPERVISOR_memory_op, "lh",
> +                op | (start_extent << MEMOP_EXTENT_SHIFT), arg);
> +
>           break;
>   
>       default:
> 

Cheers,
Paul Durrant July 3, 2020, 10:52 a.m. UTC | #3
> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 03 July 2020 11:36
> To: Michał Leszczyński <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; Andrew Cooper <andrew.cooper3@citrix.com>; George
> Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
> <jbeulich@suse.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>; paul@xen.org
> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
> 
> (+ Paul as the author XENMEM_acquire_resource)
> 
> Hi,
> 
> On 30/06/2020 13:33, Michał Leszczyński wrote:
> > From: Michal Leszczynski <michal.leszczynski@cert.pl>
> >
> > Allow to acquire large resources by allowing acquire_resource()
> > to process items in batches, using hypercall continuation.
> >
> > Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> > ---
> >   xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
> >   1 file changed, 29 insertions(+), 3 deletions(-)
> >
> > diff --git a/xen/common/memory.c b/xen/common/memory.c
> > index 714077c1e5..3ab06581a2 100644
> > --- a/xen/common/memory.c
> > +++ b/xen/common/memory.c
> > @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
> >   }
> >
> >   static int acquire_resource(
> > -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> > +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> > +    unsigned long *start_extent)
> >   {
> >       struct domain *d, *currd = current->domain;
> >       xen_mem_acquire_resource_t xmar;
> > +    uint32_t total_frames;
> >       /*
> >        * The mfn_list and gfn_list (below) arrays are ok on stack for the
> >        * moment since they are small, but if they need to grow in future
> > @@ -1077,8 +1079,17 @@ static int acquire_resource(
> >           return 0;
> >       }
> >
> > +    total_frames = xmar.nr_frames;
> 
> On 32-bit, the start_extent would be 26-bits wide which is not enough to
> cover all the xmar.nr_frames. Therefore, you want that check that it is
> possible to encode a continuation. Something like:
> 
> /* Is the size too large for us to encode a continuation? */
> if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
> 
> > +
> > +    if ( *start_extent ) > +    {
> > +        xmar.frame += *start_extent;
> > +        xmar.nr_frames -= *start_extent;
> 
> As start_extent is exposed to the guest, you want to check if it is not
> bigger than xmar.nr_frames.
> 
> > +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> > +    }
> > +
> >       if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
> > -        return -E2BIG;
> > +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
> 
> The documentation of the hypercall suggests that if you pass NULL, then
> it will return the maximum number value for nr_frames supported by the
> implementation. So technically a domain cannot use more than
> ARRAY_SIZE(mfn_list).
> 
> However, you new addition conflict with the documentation. Can you
> clarify how a domain will know that it can use more than
> ARRAY_SIZE(mfn_list)?

The domain should not need to know. It should be told the maximum number of frames of the type it wants. If we have to carve that up into batches inside Xen then the caller should not need to care, right?

> 
> >
> >       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
> >       if ( rc )
> > @@ -1135,6 +1146,14 @@ static int acquire_resource(
> >           }
> >       }
> >
> > +    if ( !rc )
> > +    {
> > +        *start_extent += xmar.nr_frames;
> > +
> > +        if ( *start_extent != total_frames )
> > +            rc = -ERESTART;
> > +    }
> > +
> >    out:
> >       rcu_unlock_domain(d);
> >
> > @@ -1600,7 +1619,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> >
> >       case XENMEM_acquire_resource:
> >           rc = acquire_resource(
> > -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
> > +            guest_handle_cast(arg, xen_mem_acquire_resource_t),
> > +            &start_extent);
> 
> Hmmm... it looks like we forgot to check that start_extent is always 0
> when the hypercall was added.
> 
> As this is exposed to the guest, it technically means that there no
> guarantee that start_extent will always be 0.
> 

I don't follow. A start extent != 0 means you are in a continuation. How can you check for 0 without breaking continuations?

  Paul

> However, in practice, this was likely the intention and should be the
> case. So it may just be enough to mention the potential breakage in the
> commit message.
> 
> @All: what do you think?
> 
> > +
> > +        if ( rc == -ERESTART )
> > +            return hypercall_create_continuation(
> > +                __HYPERVISOR_memory_op, "lh",
> > +                op | (start_extent << MEMOP_EXTENT_SHIFT), arg);
> > +
> >           break;
> >
> >       default:
> >
> 
> Cheers,
> 
> --
> Julien Grall
Julien Grall July 3, 2020, 11:17 a.m. UTC | #4
Hi,

On 03/07/2020 11:52, Paul Durrant wrote:
>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: 03 July 2020 11:36
>> To: Michał Leszczyński <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
>> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; Andrew Cooper <andrew.cooper3@citrix.com>; George
>> Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
>> <jbeulich@suse.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>; paul@xen.org
>> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
>>
>> (+ Paul as the author XENMEM_acquire_resource)
>>
>> Hi,
>>
>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>> From: Michal Leszczynski <michal.leszczynski@cert.pl>
>>>
>>> Allow to acquire large resources by allowing acquire_resource()
>>> to process items in batches, using hypercall continuation.
>>>
>>> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
>>> ---
>>>    xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
>>>    1 file changed, 29 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>>> index 714077c1e5..3ab06581a2 100644
>>> --- a/xen/common/memory.c
>>> +++ b/xen/common/memory.c
>>> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>>>    }
>>>
>>>    static int acquire_resource(
>>> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
>>> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
>>> +    unsigned long *start_extent)
>>>    {
>>>        struct domain *d, *currd = current->domain;
>>>        xen_mem_acquire_resource_t xmar;
>>> +    uint32_t total_frames;
>>>        /*
>>>         * The mfn_list and gfn_list (below) arrays are ok on stack for the
>>>         * moment since they are small, but if they need to grow in future
>>> @@ -1077,8 +1079,17 @@ static int acquire_resource(
>>>            return 0;
>>>        }
>>>
>>> +    total_frames = xmar.nr_frames;
>>
>> On 32-bit, the start_extent would be 26-bits wide which is not enough to
>> cover all the xmar.nr_frames. Therefore, you want that check that it is
>> possible to encode a continuation. Something like:
>>
>> /* Is the size too large for us to encode a continuation? */
>> if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
>>
>>> +
>>> +    if ( *start_extent ) > +    {
>>> +        xmar.frame += *start_extent;
>>> +        xmar.nr_frames -= *start_extent;
>>
>> As start_extent is exposed to the guest, you want to check if it is not
>> bigger than xmar.nr_frames.
>>
>>> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
>>> +    }
>>> +
>>>        if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
>>> -        return -E2BIG;
>>> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
>>
>> The documentation of the hypercall suggests that if you pass NULL, then
>> it will return the maximum number value for nr_frames supported by the
>> implementation. So technically a domain cannot use more than
>> ARRAY_SIZE(mfn_list).
>>
>> However, you new addition conflict with the documentation. Can you
>> clarify how a domain will know that it can use more than
>> ARRAY_SIZE(mfn_list)?
> 
> The domain should not need to know. It should be told the maximum number of frames of the type it wants. If we have to carve that up into batches inside Xen then the caller should not need to care, right?

In the current implementation, we tell the guest how many frames it can 
request in a batch. This number may be much smaller that the maximum 
number of frames of the type.

Furthermore this value is not tie to the xmar.type. Therefore, it is 
valid for a guest to call this hypercall only once at boot to figure out 
the maximum batch.

So while the change you suggest looks a good idea, I don't think it is 
possible to do that with the current hypercall.

> 
>>
>>>
>>>        rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>        if ( rc )
>>> @@ -1135,6 +1146,14 @@ static int acquire_resource(
>>>            }
>>>        }
>>>
>>> +    if ( !rc )
>>> +    {
>>> +        *start_extent += xmar.nr_frames;
>>> +
>>> +        if ( *start_extent != total_frames )
>>> +            rc = -ERESTART;
>>> +    }
>>> +
>>>     out:
>>>        rcu_unlock_domain(d);
>>>
>>> @@ -1600,7 +1619,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>>
>>>        case XENMEM_acquire_resource:
>>>            rc = acquire_resource(
>>> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
>>> +            guest_handle_cast(arg, xen_mem_acquire_resource_t),
>>> +            &start_extent);
>>
>> Hmmm... it looks like we forgot to check that start_extent is always 0
>> when the hypercall was added.
>>
>> As this is exposed to the guest, it technically means that there no
>> guarantee that start_extent will always be 0.
>>
> 
> I don't follow. A start extent != 0 means you are in a continuation. How can you check for 0 without breaking continuations?

I think you misundertood my point. My point is we never checked that 
start_extent was 0. So a guest could validly pass a non-zero value to 
start_extent and not break on older Xen release.

When this patch will be merged, such guest would behave differently. Or 
did I miss any check/documentation for the start_extent value?

Cheers,
Jan Beulich July 3, 2020, 11:22 a.m. UTC | #5
On 03.07.2020 13:17, Julien Grall wrote:
> Hi,
> 
> On 03/07/2020 11:52, Paul Durrant wrote:
>>> -----Original Message-----
>>> From: Julien Grall <julien@xen.org>
>>> Sent: 03 July 2020 11:36
>>> To: Michał Leszczyński <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
>>> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; Andrew Cooper <andrew.cooper3@citrix.com>; George
>>> Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
>>> <jbeulich@suse.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>; paul@xen.org
>>> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
>>>
>>> (+ Paul as the author XENMEM_acquire_resource)
>>>
>>> Hi,
>>>
>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>> From: Michal Leszczynski <michal.leszczynski@cert.pl>
>>>>
>>>> Allow to acquire large resources by allowing acquire_resource()
>>>> to process items in batches, using hypercall continuation.
>>>>
>>>> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
>>>> ---
>>>>    xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
>>>>    1 file changed, 29 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>>>> index 714077c1e5..3ab06581a2 100644
>>>> --- a/xen/common/memory.c
>>>> +++ b/xen/common/memory.c
>>>> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>>>>    }
>>>>
>>>>    static int acquire_resource(
>>>> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
>>>> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
>>>> +    unsigned long *start_extent)
>>>>    {
>>>>        struct domain *d, *currd = current->domain;
>>>>        xen_mem_acquire_resource_t xmar;
>>>> +    uint32_t total_frames;
>>>>        /*
>>>>         * The mfn_list and gfn_list (below) arrays are ok on stack for the
>>>>         * moment since they are small, but if they need to grow in future
>>>> @@ -1077,8 +1079,17 @@ static int acquire_resource(
>>>>            return 0;
>>>>        }
>>>>
>>>> +    total_frames = xmar.nr_frames;
>>>
>>> On 32-bit, the start_extent would be 26-bits wide which is not enough to
>>> cover all the xmar.nr_frames. Therefore, you want that check that it is
>>> possible to encode a continuation. Something like:
>>>
>>> /* Is the size too large for us to encode a continuation? */
>>> if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
>>>
>>>> +
>>>> +    if ( *start_extent ) > +    {
>>>> +        xmar.frame += *start_extent;
>>>> +        xmar.nr_frames -= *start_extent;
>>>
>>> As start_extent is exposed to the guest, you want to check if it is not
>>> bigger than xmar.nr_frames.
>>>
>>>> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
>>>> +    }
>>>> +
>>>>        if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
>>>> -        return -E2BIG;
>>>> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
>>>
>>> The documentation of the hypercall suggests that if you pass NULL, then
>>> it will return the maximum number value for nr_frames supported by the
>>> implementation. So technically a domain cannot use more than
>>> ARRAY_SIZE(mfn_list).
>>>
>>> However, you new addition conflict with the documentation. Can you
>>> clarify how a domain will know that it can use more than
>>> ARRAY_SIZE(mfn_list)?
>>
>> The domain should not need to know. It should be told the maximum number of frames of the type it wants. If we have to carve that up into batches inside Xen then the caller should not need to care, right?
> 
> In the current implementation, we tell the guest how many frames it can 
> request in a batch. This number may be much smaller that the maximum 
> number of frames of the type.
> 
> Furthermore this value is not tie to the xmar.type. Therefore, it is 
> valid for a guest to call this hypercall only once at boot to figure out 
> the maximum batch.
> 
> So while the change you suggest looks a good idea, I don't think it is 
> possible to do that with the current hypercall.

Doesn't the limit simply change to UINT_MAX >> MEMOP_EXTENT_SHIFT,
which then is what should be reported?

>>>> @@ -1135,6 +1146,14 @@ static int acquire_resource(
>>>>            }
>>>>        }
>>>>
>>>> +    if ( !rc )
>>>> +    {
>>>> +        *start_extent += xmar.nr_frames;
>>>> +
>>>> +        if ( *start_extent != total_frames )
>>>> +            rc = -ERESTART;
>>>> +    }
>>>> +
>>>>     out:
>>>>        rcu_unlock_domain(d);
>>>>
>>>> @@ -1600,7 +1619,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>>>
>>>>        case XENMEM_acquire_resource:
>>>>            rc = acquire_resource(
>>>> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
>>>> +            guest_handle_cast(arg, xen_mem_acquire_resource_t),
>>>> +            &start_extent);
>>>
>>> Hmmm... it looks like we forgot to check that start_extent is always 0
>>> when the hypercall was added.
>>>
>>> As this is exposed to the guest, it technically means that there no
>>> guarantee that start_extent will always be 0.
>>>
>>
>> I don't follow. A start extent != 0 means you are in a continuation. How can you check for 0 without breaking continuations?
> 
> I think you misundertood my point. My point is we never checked that 
> start_extent was 0. So a guest could validly pass a non-zero value to 
> start_extent and not break on older Xen release.
> 
> When this patch will be merged, such guest would behave differently. Or 
> did I miss any check/documentation for the start_extent value?

I think we may have done the same in the past already when enabling
sub-ops for use of continuations. A guest specifying a non-zero
start_extent itself is effectively a request for an undefined sub-op.
With, as a result, undefined behavior.

Jan
Julien Grall July 3, 2020, 11:36 a.m. UTC | #6
Hi,

On 03/07/2020 12:22, Jan Beulich wrote:
> On 03.07.2020 13:17, Julien Grall wrote:
>> On 03/07/2020 11:52, Paul Durrant wrote:
>>>> -----Original Message-----
>>>> From: Julien Grall <julien@xen.org>
>>>> Sent: 03 July 2020 11:36
>>>> To: Michał Leszczyński <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
>>>> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; Andrew Cooper <andrew.cooper3@citrix.com>; George
>>>> Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
>>>> <jbeulich@suse.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>; paul@xen.org
>>>> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
>>>>
>>>> (+ Paul as the author XENMEM_acquire_resource)
>>>>
>>>> Hi,
>>>>
>>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>>> From: Michal Leszczynski <michal.leszczynski@cert.pl>
>>>>>
>>>>> Allow to acquire large resources by allowing acquire_resource()
>>>>> to process items in batches, using hypercall continuation.
>>>>>
>>>>> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
>>>>> ---
>>>>>     xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
>>>>>     1 file changed, 29 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>>>>> index 714077c1e5..3ab06581a2 100644
>>>>> --- a/xen/common/memory.c
>>>>> +++ b/xen/common/memory.c
>>>>> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>>>>>     }
>>>>>
>>>>>     static int acquire_resource(
>>>>> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
>>>>> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
>>>>> +    unsigned long *start_extent)
>>>>>     {
>>>>>         struct domain *d, *currd = current->domain;
>>>>>         xen_mem_acquire_resource_t xmar;
>>>>> +    uint32_t total_frames;
>>>>>         /*
>>>>>          * The mfn_list and gfn_list (below) arrays are ok on stack for the
>>>>>          * moment since they are small, but if they need to grow in future
>>>>> @@ -1077,8 +1079,17 @@ static int acquire_resource(
>>>>>             return 0;
>>>>>         }
>>>>>
>>>>> +    total_frames = xmar.nr_frames;
>>>>
>>>> On 32-bit, the start_extent would be 26-bits wide which is not enough to
>>>> cover all the xmar.nr_frames. Therefore, you want that check that it is
>>>> possible to encode a continuation. Something like:
>>>>
>>>> /* Is the size too large for us to encode a continuation? */
>>>> if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
>>>>
>>>>> +
>>>>> +    if ( *start_extent ) > +    {
>>>>> +        xmar.frame += *start_extent;
>>>>> +        xmar.nr_frames -= *start_extent;
>>>>
>>>> As start_extent is exposed to the guest, you want to check if it is not
>>>> bigger than xmar.nr_frames.
>>>>
>>>>> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
>>>>> +    }
>>>>> +
>>>>>         if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
>>>>> -        return -E2BIG;
>>>>> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
>>>>
>>>> The documentation of the hypercall suggests that if you pass NULL, then
>>>> it will return the maximum number value for nr_frames supported by the
>>>> implementation. So technically a domain cannot use more than
>>>> ARRAY_SIZE(mfn_list).
>>>>
>>>> However, you new addition conflict with the documentation. Can you
>>>> clarify how a domain will know that it can use more than
>>>> ARRAY_SIZE(mfn_list)?
>>>
>>> The domain should not need to know. It should be told the maximum number of frames of the type it wants. If we have to carve that up into batches inside Xen then the caller should not need to care, right?
>>
>> In the current implementation, we tell the guest how many frames it can
>> request in a batch. This number may be much smaller that the maximum
>> number of frames of the type.
>>
>> Furthermore this value is not tie to the xmar.type. Therefore, it is
>> valid for a guest to call this hypercall only once at boot to figure out
>> the maximum batch.
>>
>> So while the change you suggest looks a good idea, I don't think it is
>> possible to do that with the current hypercall.
> 
> Doesn't the limit simply change to UINT_MAX >> MEMOP_EXTENT_SHIFT,
> which then is what should be reported?

Hmmm... Can you remind me whether we support migration to an older release?

But it may stilln't be a concern as this can only be used by Dom0 or a 
PV domain targeting another domain.

> 
>>>>> @@ -1135,6 +1146,14 @@ static int acquire_resource(
>>>>>             }
>>>>>         }
>>>>>
>>>>> +    if ( !rc )
>>>>> +    {
>>>>> +        *start_extent += xmar.nr_frames;
>>>>> +
>>>>> +        if ( *start_extent != total_frames )
>>>>> +            rc = -ERESTART;
>>>>> +    }
>>>>> +
>>>>>      out:
>>>>>         rcu_unlock_domain(d);
>>>>>
>>>>> @@ -1600,7 +1619,14 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>>>>
>>>>>         case XENMEM_acquire_resource:
>>>>>             rc = acquire_resource(
>>>>> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
>>>>> +            guest_handle_cast(arg, xen_mem_acquire_resource_t),
>>>>> +            &start_extent);
>>>>
>>>> Hmmm... it looks like we forgot to check that start_extent is always 0
>>>> when the hypercall was added.
>>>>
>>>> As this is exposed to the guest, it technically means that there no
>>>> guarantee that start_extent will always be 0.
>>>>
>>>
>>> I don't follow. A start extent != 0 means you are in a continuation. How can you check for 0 without breaking continuations?
>>
>> I think you misundertood my point. My point is we never checked that
>> start_extent was 0. So a guest could validly pass a non-zero value to
>> start_extent and not break on older Xen release.
>>
>> When this patch will be merged, such guest would behave differently. Or
>> did I miss any check/documentation for the start_extent value?
> 
> I think we may have done the same in the past already when enabling
> sub-ops for use of continuations. A guest specifying a non-zero
> start_extent itself is effectively a request for an undefined sub-op.
> With, as a result, undefined behavior.
Ok. So just mentioning the change in the commit message should be fine then.

Cheers,
Paul Durrant July 3, 2020, 11:40 a.m. UTC | #7
> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 03 July 2020 12:18
> To: paul@xen.org; 'Michał Leszczyński' <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; 'Andrew Cooper' <andrew.cooper3@citrix.com>;
> 'George Dunlap' <george.dunlap@citrix.com>; 'Ian Jackson' <ian.jackson@eu.citrix.com>; 'Jan Beulich'
> <jbeulich@suse.com>; 'Stefano Stabellini' <sstabellini@kernel.org>; 'Wei Liu' <wl@xen.org>
> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
> 
> Hi,
> 
> On 03/07/2020 11:52, Paul Durrant wrote:
> >> -----Original Message-----
> >> From: Julien Grall <julien@xen.org>
> >> Sent: 03 July 2020 11:36
> >> To: Michał Leszczyński <michal.leszczynski@cert.pl>; xen-devel@lists.xenproject.org
> >> Cc: luwei.kang@intel.com; tamas.lengyel@intel.com; Andrew Cooper <andrew.cooper3@citrix.com>;
> George
> >> Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
> >> <jbeulich@suse.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>;
> paul@xen.org
> >> Subject: Re: [PATCH v4 06/10] memory: batch processing in acquire_resource()
> >>
> >> (+ Paul as the author XENMEM_acquire_resource)
> >>
> >> Hi,
> >>
> >> On 30/06/2020 13:33, Michał Leszczyński wrote:
> >>> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> >>>
> >>> Allow to acquire large resources by allowing acquire_resource()
> >>> to process items in batches, using hypercall continuation.
> >>>
> >>> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> >>> ---
> >>>    xen/common/memory.c | 32 +++++++++++++++++++++++++++++---
> >>>    1 file changed, 29 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/xen/common/memory.c b/xen/common/memory.c
> >>> index 714077c1e5..3ab06581a2 100644
> >>> --- a/xen/common/memory.c
> >>> +++ b/xen/common/memory.c
> >>> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
> >>>    }
> >>>
> >>>    static int acquire_resource(
> >>> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> >>> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> >>> +    unsigned long *start_extent)
> >>>    {
> >>>        struct domain *d, *currd = current->domain;
> >>>        xen_mem_acquire_resource_t xmar;
> >>> +    uint32_t total_frames;
> >>>        /*
> >>>         * The mfn_list and gfn_list (below) arrays are ok on stack for the
> >>>         * moment since they are small, but if they need to grow in future
> >>> @@ -1077,8 +1079,17 @@ static int acquire_resource(
> >>>            return 0;
> >>>        }
> >>>
> >>> +    total_frames = xmar.nr_frames;
> >>
> >> On 32-bit, the start_extent would be 26-bits wide which is not enough to
> >> cover all the xmar.nr_frames. Therefore, you want that check that it is
> >> possible to encode a continuation. Something like:
> >>
> >> /* Is the size too large for us to encode a continuation? */
> >> if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
> >>
> >>> +
> >>> +    if ( *start_extent ) > +    {
> >>> +        xmar.frame += *start_extent;
> >>> +        xmar.nr_frames -= *start_extent;
> >>
> >> As start_extent is exposed to the guest, you want to check if it is not
> >> bigger than xmar.nr_frames.
> >>
> >>> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> >>> +    }
> >>> +
> >>>        if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
> >>> -        return -E2BIG;
> >>> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
> >>
> >> The documentation of the hypercall suggests that if you pass NULL, then
> >> it will return the maximum number value for nr_frames supported by the
> >> implementation. So technically a domain cannot use more than
> >> ARRAY_SIZE(mfn_list).
> >>
> >> However, you new addition conflict with the documentation. Can you
> >> clarify how a domain will know that it can use more than
> >> ARRAY_SIZE(mfn_list)?
> >
> > The domain should not need to know. It should be told the maximum number of frames of the type it
> wants. If we have to carve that up into batches inside Xen then the caller should not need to care,
> right?
> 
> In the current implementation, we tell the guest how many frames it can
> request in a batch. This number may be much smaller that the maximum
> number of frames of the type.
> 
> Furthermore this value is not tie to the xmar.type. Therefore, it is
> valid for a guest to call this hypercall only once at boot to figure out
> the maximum batch.
> 
> So while the change you suggest looks a good idea, I don't think it is
> possible to do that with the current hypercall.
> 

Oh, I was clearly misremembering what the semantic was; I thought it was implementation max for the given type but indeed we do just return the array size, so we expect the caller to know the individual resource type limitations.
So, as Jan says, passing back UINT_MAX >> MEMOP_EXTENT_SHIFT seems to be what we need.

  Paul
Jan Beulich July 3, 2020, 12:50 p.m. UTC | #8
On 03.07.2020 13:36, Julien Grall wrote:
> On 03/07/2020 12:22, Jan Beulich wrote:
>> On 03.07.2020 13:17, Julien Grall wrote:
>>> In the current implementation, we tell the guest how many frames it can
>>> request in a batch. This number may be much smaller that the maximum
>>> number of frames of the type.
>>>
>>> Furthermore this value is not tie to the xmar.type. Therefore, it is
>>> valid for a guest to call this hypercall only once at boot to figure out
>>> the maximum batch.
>>>
>>> So while the change you suggest looks a good idea, I don't think it is
>>> possible to do that with the current hypercall.
>>
>> Doesn't the limit simply change to UINT_MAX >> MEMOP_EXTENT_SHIFT,
>> which then is what should be reported?
> 
> Hmmm... Can you remind me whether we support migration to an older release?

I'm pretty sure we say "N -> N+1 only" somewhere, but this "somewhere"
clearly isn't SUPPORT.md.

Jan

Patch
diff mbox series

diff --git a/xen/common/memory.c b/xen/common/memory.c
index 714077c1e5..3ab06581a2 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1046,10 +1046,12 @@  static int acquire_grant_table(struct domain *d, unsigned int id,
 }
 
 static int acquire_resource(
-    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
+    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
+    unsigned long *start_extent)
 {
     struct domain *d, *currd = current->domain;
     xen_mem_acquire_resource_t xmar;
+    uint32_t total_frames;
     /*
      * The mfn_list and gfn_list (below) arrays are ok on stack for the
      * moment since they are small, but if they need to grow in future
@@ -1077,8 +1079,17 @@  static int acquire_resource(
         return 0;
     }
 
+    total_frames = xmar.nr_frames;
+
+    if ( *start_extent )
+    {
+        xmar.frame += *start_extent;
+        xmar.nr_frames -= *start_extent;
+        guest_handle_add_offset(xmar.frame_list, *start_extent);
+    }
+
     if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
-        return -E2BIG;
+        xmar.nr_frames = ARRAY_SIZE(mfn_list);
 
     rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
     if ( rc )
@@ -1135,6 +1146,14 @@  static int acquire_resource(
         }
     }
 
+    if ( !rc )
+    {
+        *start_extent += xmar.nr_frames;
+
+        if ( *start_extent != total_frames )
+            rc = -ERESTART;
+    }
+
  out:
     rcu_unlock_domain(d);
 
@@ -1600,7 +1619,14 @@  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
     case XENMEM_acquire_resource:
         rc = acquire_resource(
-            guest_handle_cast(arg, xen_mem_acquire_resource_t));
+            guest_handle_cast(arg, xen_mem_acquire_resource_t),
+            &start_extent);
+
+        if ( rc == -ERESTART )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "lh",
+                op | (start_extent << MEMOP_EXTENT_SHIFT), arg);
+
         break;
 
     default: